gcc/
[official-gcc.git] / gcc / config / arm / arm.c
blob1b3a6fc5006fc722e3d70e8aca58129bf77542ef
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2015 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "hash-table.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "hash-set.h"
30 #include "machmode.h"
31 #include "vec.h"
32 #include "double-int.h"
33 #include "input.h"
34 #include "alias.h"
35 #include "symtab.h"
36 #include "wide-int.h"
37 #include "inchash.h"
38 #include "tree.h"
39 #include "fold-const.h"
40 #include "stringpool.h"
41 #include "stor-layout.h"
42 #include "calls.h"
43 #include "varasm.h"
44 #include "obstack.h"
45 #include "regs.h"
46 #include "hard-reg-set.h"
47 #include "insn-config.h"
48 #include "conditions.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "flags.h"
52 #include "reload.h"
53 #include "function.h"
54 #include "hashtab.h"
55 #include "statistics.h"
56 #include "real.h"
57 #include "fixed-value.h"
58 #include "expmed.h"
59 #include "dojump.h"
60 #include "explow.h"
61 #include "emit-rtl.h"
62 #include "stmt.h"
63 #include "expr.h"
64 #include "insn-codes.h"
65 #include "optabs.h"
66 #include "diagnostic-core.h"
67 #include "recog.h"
68 #include "predict.h"
69 #include "dominance.h"
70 #include "cfg.h"
71 #include "cfgrtl.h"
72 #include "cfganal.h"
73 #include "lcm.h"
74 #include "cfgbuild.h"
75 #include "cfgcleanup.h"
76 #include "basic-block.h"
77 #include "hash-map.h"
78 #include "is-a.h"
79 #include "plugin-api.h"
80 #include "ipa-ref.h"
81 #include "cgraph.h"
82 #include "ggc.h"
83 #include "except.h"
84 #include "tm_p.h"
85 #include "target.h"
86 #include "sched-int.h"
87 #include "target-def.h"
88 #include "debug.h"
89 #include "langhooks.h"
90 #include "df.h"
91 #include "intl.h"
92 #include "libfuncs.h"
93 #include "params.h"
94 #include "opts.h"
95 #include "dumpfile.h"
96 #include "gimple-expr.h"
97 #include "builtins.h"
98 #include "tm-constrs.h"
99 #include "rtl-iter.h"
100 #include "sched-int.h"
102 /* Forward definitions of types. */
103 typedef struct minipool_node Mnode;
104 typedef struct minipool_fixup Mfix;
106 void (*arm_lang_output_object_attributes_hook)(void);
108 struct four_ints
110 int i[4];
113 /* Forward function declarations. */
114 static bool arm_const_not_ok_for_debug_p (rtx);
115 static bool arm_needs_doubleword_align (machine_mode, const_tree);
116 static int arm_compute_static_chain_stack_bytes (void);
117 static arm_stack_offsets *arm_get_frame_offsets (void);
118 static void arm_add_gc_roots (void);
119 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
120 HOST_WIDE_INT, rtx, rtx, int, int);
121 static unsigned bit_count (unsigned long);
122 static int arm_address_register_rtx_p (rtx, int);
123 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
124 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
125 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
126 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
127 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
128 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
129 inline static int thumb1_index_register_rtx_p (rtx, int);
130 static int thumb_far_jump_used_p (void);
131 static bool thumb_force_lr_save (void);
132 static unsigned arm_size_return_regs (void);
133 static bool arm_assemble_integer (rtx, unsigned int, int);
134 static void arm_print_operand (FILE *, rtx, int);
135 static void arm_print_operand_address (FILE *, rtx);
136 static bool arm_print_operand_punct_valid_p (unsigned char code);
137 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
138 static arm_cc get_arm_condition_code (rtx);
139 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
140 static const char *output_multi_immediate (rtx *, const char *, const char *,
141 int, HOST_WIDE_INT);
142 static const char *shift_op (rtx, HOST_WIDE_INT *);
143 static struct machine_function *arm_init_machine_status (void);
144 static void thumb_exit (FILE *, int);
145 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
146 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
147 static Mnode *add_minipool_forward_ref (Mfix *);
148 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
149 static Mnode *add_minipool_backward_ref (Mfix *);
150 static void assign_minipool_offsets (Mfix *);
151 static void arm_print_value (FILE *, rtx);
152 static void dump_minipool (rtx_insn *);
153 static int arm_barrier_cost (rtx_insn *);
154 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
155 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
156 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
157 machine_mode, rtx);
158 static void arm_reorg (void);
159 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
160 static unsigned long arm_compute_save_reg0_reg12_mask (void);
161 static unsigned long arm_compute_save_reg_mask (void);
162 static unsigned long arm_isr_value (tree);
163 static unsigned long arm_compute_func_type (void);
164 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
165 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
166 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
167 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
168 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
169 #endif
170 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
171 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
172 static int arm_comp_type_attributes (const_tree, const_tree);
173 static void arm_set_default_type_attributes (tree);
174 static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
175 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
176 static int optimal_immediate_sequence (enum rtx_code code,
177 unsigned HOST_WIDE_INT val,
178 struct four_ints *return_sequence);
179 static int optimal_immediate_sequence_1 (enum rtx_code code,
180 unsigned HOST_WIDE_INT val,
181 struct four_ints *return_sequence,
182 int i);
183 static int arm_get_strip_length (int);
184 static bool arm_function_ok_for_sibcall (tree, tree);
185 static machine_mode arm_promote_function_mode (const_tree,
186 machine_mode, int *,
187 const_tree, int);
188 static bool arm_return_in_memory (const_tree, const_tree);
189 static rtx arm_function_value (const_tree, const_tree, bool);
190 static rtx arm_libcall_value_1 (machine_mode);
191 static rtx arm_libcall_value (machine_mode, const_rtx);
192 static bool arm_function_value_regno_p (const unsigned int);
193 static void arm_internal_label (FILE *, const char *, unsigned long);
194 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
195 tree);
196 static bool arm_have_conditional_execution (void);
197 static bool arm_cannot_force_const_mem (machine_mode, rtx);
198 static bool arm_legitimate_constant_p (machine_mode, rtx);
199 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
200 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
201 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
202 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
203 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
204 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
205 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
206 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
207 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
208 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
209 static void emit_constant_insn (rtx cond, rtx pattern);
210 static rtx_insn *emit_set_insn (rtx, rtx);
211 static rtx emit_multi_reg_push (unsigned long, unsigned long);
212 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
213 tree, bool);
214 static rtx arm_function_arg (cumulative_args_t, machine_mode,
215 const_tree, bool);
216 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
217 const_tree, bool);
218 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
219 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
220 const_tree);
221 static rtx aapcs_libcall_value (machine_mode);
222 static int aapcs_select_return_coproc (const_tree, const_tree);
224 #ifdef OBJECT_FORMAT_ELF
225 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
226 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
227 #endif
228 #ifndef ARM_PE
229 static void arm_encode_section_info (tree, rtx, int);
230 #endif
232 static void arm_file_end (void);
233 static void arm_file_start (void);
235 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
236 tree, int *, int);
237 static bool arm_pass_by_reference (cumulative_args_t,
238 machine_mode, const_tree, bool);
239 static bool arm_promote_prototypes (const_tree);
240 static bool arm_default_short_enums (void);
241 static bool arm_align_anon_bitfield (void);
242 static bool arm_return_in_msb (const_tree);
243 static bool arm_must_pass_in_stack (machine_mode, const_tree);
244 static bool arm_return_in_memory (const_tree, const_tree);
245 #if ARM_UNWIND_INFO
246 static void arm_unwind_emit (FILE *, rtx_insn *);
247 static bool arm_output_ttype (rtx);
248 static void arm_asm_emit_except_personality (rtx);
249 static void arm_asm_init_sections (void);
250 #endif
251 static rtx arm_dwarf_register_span (rtx);
253 static tree arm_cxx_guard_type (void);
254 static bool arm_cxx_guard_mask_bit (void);
255 static tree arm_get_cookie_size (tree);
256 static bool arm_cookie_has_size (void);
257 static bool arm_cxx_cdtor_returns_this (void);
258 static bool arm_cxx_key_method_may_be_inline (void);
259 static void arm_cxx_determine_class_data_visibility (tree);
260 static bool arm_cxx_class_data_always_comdat (void);
261 static bool arm_cxx_use_aeabi_atexit (void);
262 static void arm_init_libfuncs (void);
263 static tree arm_build_builtin_va_list (void);
264 static void arm_expand_builtin_va_start (tree, rtx);
265 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
266 static void arm_option_override (void);
267 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
268 static bool arm_macro_fusion_p (void);
269 static bool arm_cannot_copy_insn_p (rtx_insn *);
270 static int arm_issue_rate (void);
271 static int arm_first_cycle_multipass_dfa_lookahead (void);
272 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
273 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
274 static bool arm_output_addr_const_extra (FILE *, rtx);
275 static bool arm_allocate_stack_slots_for_args (void);
276 static bool arm_warn_func_return (tree);
277 static const char *arm_invalid_parameter_type (const_tree t);
278 static const char *arm_invalid_return_type (const_tree t);
279 static tree arm_promoted_type (const_tree t);
280 static tree arm_convert_to_type (tree type, tree expr);
281 static bool arm_scalar_mode_supported_p (machine_mode);
282 static bool arm_frame_pointer_required (void);
283 static bool arm_can_eliminate (const int, const int);
284 static void arm_asm_trampoline_template (FILE *);
285 static void arm_trampoline_init (rtx, tree, rtx);
286 static rtx arm_trampoline_adjust_address (rtx);
287 static rtx arm_pic_static_addr (rtx orig, rtx reg);
288 static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
289 static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
290 static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
291 static bool arm_array_mode_supported_p (machine_mode,
292 unsigned HOST_WIDE_INT);
293 static machine_mode arm_preferred_simd_mode (machine_mode);
294 static bool arm_class_likely_spilled_p (reg_class_t);
295 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
296 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
297 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
298 const_tree type,
299 int misalignment,
300 bool is_packed);
301 static void arm_conditional_register_usage (void);
302 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
303 static unsigned int arm_autovectorize_vector_sizes (void);
304 static int arm_default_branch_cost (bool, bool);
305 static int arm_cortex_a5_branch_cost (bool, bool);
306 static int arm_cortex_m_branch_cost (bool, bool);
307 static int arm_cortex_m7_branch_cost (bool, bool);
309 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
310 const unsigned char *sel);
312 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
314 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
315 tree vectype,
316 int misalign ATTRIBUTE_UNUSED);
317 static unsigned arm_add_stmt_cost (void *data, int count,
318 enum vect_cost_for_stmt kind,
319 struct _stmt_vec_info *stmt_info,
320 int misalign,
321 enum vect_cost_model_location where);
323 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
324 bool op0_preserve_value);
325 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
327 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
329 /* Table of machine attributes. */
330 static const struct attribute_spec arm_attribute_table[] =
332 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
333 affects_type_identity } */
334 /* Function calls made to this symbol must be done indirectly, because
335 it may lie outside of the 26 bit addressing range of a normal function
336 call. */
337 { "long_call", 0, 0, false, true, true, NULL, false },
338 /* Whereas these functions are always known to reside within the 26 bit
339 addressing range. */
340 { "short_call", 0, 0, false, true, true, NULL, false },
341 /* Specify the procedure call conventions for a function. */
342 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
343 false },
344 /* Interrupt Service Routines have special prologue and epilogue requirements. */
345 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
346 false },
347 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
348 false },
349 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
350 false },
351 #ifdef ARM_PE
352 /* ARM/PE has three new attributes:
353 interfacearm - ?
354 dllexport - for exporting a function/variable that will live in a dll
355 dllimport - for importing a function/variable from a dll
357 Microsoft allows multiple declspecs in one __declspec, separating
358 them with spaces. We do NOT support this. Instead, use __declspec
359 multiple times.
361 { "dllimport", 0, 0, true, false, false, NULL, false },
362 { "dllexport", 0, 0, true, false, false, NULL, false },
363 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
364 false },
365 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
366 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
367 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
368 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
369 false },
370 #endif
371 { NULL, 0, 0, false, false, false, NULL, false }
374 /* Initialize the GCC target structure. */
375 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
376 #undef TARGET_MERGE_DECL_ATTRIBUTES
377 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
378 #endif
380 #undef TARGET_LEGITIMIZE_ADDRESS
381 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
383 #undef TARGET_LRA_P
384 #define TARGET_LRA_P hook_bool_void_true
386 #undef TARGET_ATTRIBUTE_TABLE
387 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
389 #undef TARGET_ASM_FILE_START
390 #define TARGET_ASM_FILE_START arm_file_start
391 #undef TARGET_ASM_FILE_END
392 #define TARGET_ASM_FILE_END arm_file_end
394 #undef TARGET_ASM_ALIGNED_SI_OP
395 #define TARGET_ASM_ALIGNED_SI_OP NULL
396 #undef TARGET_ASM_INTEGER
397 #define TARGET_ASM_INTEGER arm_assemble_integer
399 #undef TARGET_PRINT_OPERAND
400 #define TARGET_PRINT_OPERAND arm_print_operand
401 #undef TARGET_PRINT_OPERAND_ADDRESS
402 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
403 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
404 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
406 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
407 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
409 #undef TARGET_ASM_FUNCTION_PROLOGUE
410 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
412 #undef TARGET_ASM_FUNCTION_EPILOGUE
413 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
415 #undef TARGET_OPTION_OVERRIDE
416 #define TARGET_OPTION_OVERRIDE arm_option_override
418 #undef TARGET_COMP_TYPE_ATTRIBUTES
419 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
421 #undef TARGET_SCHED_MACRO_FUSION_P
422 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
424 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
425 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
427 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
428 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
430 #undef TARGET_SCHED_ADJUST_COST
431 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
433 #undef TARGET_SCHED_REORDER
434 #define TARGET_SCHED_REORDER arm_sched_reorder
436 #undef TARGET_REGISTER_MOVE_COST
437 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
439 #undef TARGET_MEMORY_MOVE_COST
440 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
442 #undef TARGET_ENCODE_SECTION_INFO
443 #ifdef ARM_PE
444 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
445 #else
446 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
447 #endif
449 #undef TARGET_STRIP_NAME_ENCODING
450 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
452 #undef TARGET_ASM_INTERNAL_LABEL
453 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
455 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
456 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
458 #undef TARGET_FUNCTION_VALUE
459 #define TARGET_FUNCTION_VALUE arm_function_value
461 #undef TARGET_LIBCALL_VALUE
462 #define TARGET_LIBCALL_VALUE arm_libcall_value
464 #undef TARGET_FUNCTION_VALUE_REGNO_P
465 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
467 #undef TARGET_ASM_OUTPUT_MI_THUNK
468 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
469 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
470 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
472 #undef TARGET_RTX_COSTS
473 #define TARGET_RTX_COSTS arm_rtx_costs
474 #undef TARGET_ADDRESS_COST
475 #define TARGET_ADDRESS_COST arm_address_cost
477 #undef TARGET_SHIFT_TRUNCATION_MASK
478 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
479 #undef TARGET_VECTOR_MODE_SUPPORTED_P
480 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
481 #undef TARGET_ARRAY_MODE_SUPPORTED_P
482 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
483 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
484 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
485 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
486 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
487 arm_autovectorize_vector_sizes
489 #undef TARGET_MACHINE_DEPENDENT_REORG
490 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
492 #undef TARGET_INIT_BUILTINS
493 #define TARGET_INIT_BUILTINS arm_init_builtins
494 #undef TARGET_EXPAND_BUILTIN
495 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
496 #undef TARGET_BUILTIN_DECL
497 #define TARGET_BUILTIN_DECL arm_builtin_decl
499 #undef TARGET_INIT_LIBFUNCS
500 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
502 #undef TARGET_PROMOTE_FUNCTION_MODE
503 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
504 #undef TARGET_PROMOTE_PROTOTYPES
505 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
506 #undef TARGET_PASS_BY_REFERENCE
507 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
508 #undef TARGET_ARG_PARTIAL_BYTES
509 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
510 #undef TARGET_FUNCTION_ARG
511 #define TARGET_FUNCTION_ARG arm_function_arg
512 #undef TARGET_FUNCTION_ARG_ADVANCE
513 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
514 #undef TARGET_FUNCTION_ARG_BOUNDARY
515 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
517 #undef TARGET_SETUP_INCOMING_VARARGS
518 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
520 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
521 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
523 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
524 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
525 #undef TARGET_TRAMPOLINE_INIT
526 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
527 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
528 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
530 #undef TARGET_WARN_FUNC_RETURN
531 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
533 #undef TARGET_DEFAULT_SHORT_ENUMS
534 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
536 #undef TARGET_ALIGN_ANON_BITFIELD
537 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
539 #undef TARGET_NARROW_VOLATILE_BITFIELD
540 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
542 #undef TARGET_CXX_GUARD_TYPE
543 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
545 #undef TARGET_CXX_GUARD_MASK_BIT
546 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
548 #undef TARGET_CXX_GET_COOKIE_SIZE
549 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
551 #undef TARGET_CXX_COOKIE_HAS_SIZE
552 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
554 #undef TARGET_CXX_CDTOR_RETURNS_THIS
555 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
557 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
558 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
560 #undef TARGET_CXX_USE_AEABI_ATEXIT
561 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
563 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
564 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
565 arm_cxx_determine_class_data_visibility
567 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
568 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
570 #undef TARGET_RETURN_IN_MSB
571 #define TARGET_RETURN_IN_MSB arm_return_in_msb
573 #undef TARGET_RETURN_IN_MEMORY
574 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
576 #undef TARGET_MUST_PASS_IN_STACK
577 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
579 #if ARM_UNWIND_INFO
580 #undef TARGET_ASM_UNWIND_EMIT
581 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
583 /* EABI unwinding tables use a different format for the typeinfo tables. */
584 #undef TARGET_ASM_TTYPE
585 #define TARGET_ASM_TTYPE arm_output_ttype
587 #undef TARGET_ARM_EABI_UNWINDER
588 #define TARGET_ARM_EABI_UNWINDER true
590 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
591 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
593 #undef TARGET_ASM_INIT_SECTIONS
594 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
595 #endif /* ARM_UNWIND_INFO */
597 #undef TARGET_DWARF_REGISTER_SPAN
598 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
600 #undef TARGET_CANNOT_COPY_INSN_P
601 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
603 #ifdef HAVE_AS_TLS
604 #undef TARGET_HAVE_TLS
605 #define TARGET_HAVE_TLS true
606 #endif
608 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
609 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
611 #undef TARGET_LEGITIMATE_CONSTANT_P
612 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
614 #undef TARGET_CANNOT_FORCE_CONST_MEM
615 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
617 #undef TARGET_MAX_ANCHOR_OFFSET
618 #define TARGET_MAX_ANCHOR_OFFSET 4095
620 /* The minimum is set such that the total size of the block
621 for a particular anchor is -4088 + 1 + 4095 bytes, which is
622 divisible by eight, ensuring natural spacing of anchors. */
623 #undef TARGET_MIN_ANCHOR_OFFSET
624 #define TARGET_MIN_ANCHOR_OFFSET -4088
626 #undef TARGET_SCHED_ISSUE_RATE
627 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
629 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
630 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
631 arm_first_cycle_multipass_dfa_lookahead
633 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
634 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
635 arm_first_cycle_multipass_dfa_lookahead_guard
637 #undef TARGET_MANGLE_TYPE
638 #define TARGET_MANGLE_TYPE arm_mangle_type
640 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
641 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
643 #undef TARGET_BUILD_BUILTIN_VA_LIST
644 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
645 #undef TARGET_EXPAND_BUILTIN_VA_START
646 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
647 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
648 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
650 #ifdef HAVE_AS_TLS
651 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
652 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
653 #endif
655 #undef TARGET_LEGITIMATE_ADDRESS_P
656 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
658 #undef TARGET_PREFERRED_RELOAD_CLASS
659 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
661 #undef TARGET_INVALID_PARAMETER_TYPE
662 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
664 #undef TARGET_INVALID_RETURN_TYPE
665 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
667 #undef TARGET_PROMOTED_TYPE
668 #define TARGET_PROMOTED_TYPE arm_promoted_type
670 #undef TARGET_CONVERT_TO_TYPE
671 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
673 #undef TARGET_SCALAR_MODE_SUPPORTED_P
674 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
676 #undef TARGET_FRAME_POINTER_REQUIRED
677 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
679 #undef TARGET_CAN_ELIMINATE
680 #define TARGET_CAN_ELIMINATE arm_can_eliminate
682 #undef TARGET_CONDITIONAL_REGISTER_USAGE
683 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
685 #undef TARGET_CLASS_LIKELY_SPILLED_P
686 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
688 #undef TARGET_VECTORIZE_BUILTINS
689 #define TARGET_VECTORIZE_BUILTINS
691 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
692 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
693 arm_builtin_vectorized_function
695 #undef TARGET_VECTOR_ALIGNMENT
696 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
698 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
699 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
700 arm_vector_alignment_reachable
702 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
703 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
704 arm_builtin_support_vector_misalignment
706 #undef TARGET_PREFERRED_RENAME_CLASS
707 #define TARGET_PREFERRED_RENAME_CLASS \
708 arm_preferred_rename_class
710 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
711 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
712 arm_vectorize_vec_perm_const_ok
714 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
715 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
716 arm_builtin_vectorization_cost
717 #undef TARGET_VECTORIZE_ADD_STMT_COST
718 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
720 #undef TARGET_CANONICALIZE_COMPARISON
721 #define TARGET_CANONICALIZE_COMPARISON \
722 arm_canonicalize_comparison
724 #undef TARGET_ASAN_SHADOW_OFFSET
725 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
727 #undef MAX_INSN_PER_IT_BLOCK
728 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
730 #undef TARGET_CAN_USE_DOLOOP_P
731 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
733 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
734 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
736 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
737 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
739 #undef TARGET_SCHED_FUSION_PRIORITY
740 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
742 struct gcc_target targetm = TARGET_INITIALIZER;
744 /* Obstack for minipool constant handling. */
745 static struct obstack minipool_obstack;
746 static char * minipool_startobj;
748 /* The maximum number of insns skipped which
749 will be conditionalised if possible. */
750 static int max_insns_skipped = 5;
752 extern FILE * asm_out_file;
754 /* True if we are currently building a constant table. */
755 int making_const_table;
757 /* The processor for which instructions should be scheduled. */
758 enum processor_type arm_tune = arm_none;
760 /* The current tuning set. */
761 const struct tune_params *current_tune;
763 /* Which floating point hardware to schedule for. */
764 int arm_fpu_attr;
766 /* Which floating popint hardware to use. */
767 const struct arm_fpu_desc *arm_fpu_desc;
769 /* Used for Thumb call_via trampolines. */
770 rtx thumb_call_via_label[14];
771 static int thumb_call_reg_needed;
773 /* The bits in this mask specify which
774 instructions we are allowed to generate. */
775 unsigned long insn_flags = 0;
777 /* The bits in this mask specify which instruction scheduling options should
778 be used. */
779 unsigned long tune_flags = 0;
781 /* The highest ARM architecture version supported by the
782 target. */
783 enum base_architecture arm_base_arch = BASE_ARCH_0;
785 /* The following are used in the arm.md file as equivalents to bits
786 in the above two flag variables. */
788 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
789 int arm_arch3m = 0;
791 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
792 int arm_arch4 = 0;
794 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
795 int arm_arch4t = 0;
797 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
798 int arm_arch5 = 0;
800 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
801 int arm_arch5e = 0;
803 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
804 int arm_arch6 = 0;
806 /* Nonzero if this chip supports the ARM 6K extensions. */
807 int arm_arch6k = 0;
809 /* Nonzero if instructions present in ARMv6-M can be used. */
810 int arm_arch6m = 0;
812 /* Nonzero if this chip supports the ARM 7 extensions. */
813 int arm_arch7 = 0;
815 /* Nonzero if instructions not present in the 'M' profile can be used. */
816 int arm_arch_notm = 0;
818 /* Nonzero if instructions present in ARMv7E-M can be used. */
819 int arm_arch7em = 0;
821 /* Nonzero if instructions present in ARMv8 can be used. */
822 int arm_arch8 = 0;
824 /* Nonzero if this chip can benefit from load scheduling. */
825 int arm_ld_sched = 0;
827 /* Nonzero if this chip is a StrongARM. */
828 int arm_tune_strongarm = 0;
830 /* Nonzero if this chip supports Intel Wireless MMX technology. */
831 int arm_arch_iwmmxt = 0;
833 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
834 int arm_arch_iwmmxt2 = 0;
836 /* Nonzero if this chip is an XScale. */
837 int arm_arch_xscale = 0;
839 /* Nonzero if tuning for XScale */
840 int arm_tune_xscale = 0;
842 /* Nonzero if we want to tune for stores that access the write-buffer.
843 This typically means an ARM6 or ARM7 with MMU or MPU. */
844 int arm_tune_wbuf = 0;
846 /* Nonzero if tuning for Cortex-A9. */
847 int arm_tune_cortex_a9 = 0;
849 /* Nonzero if generating Thumb instructions. */
850 int thumb_code = 0;
852 /* Nonzero if generating Thumb-1 instructions. */
853 int thumb1_code = 0;
855 /* Nonzero if we should define __THUMB_INTERWORK__ in the
856 preprocessor.
857 XXX This is a bit of a hack, it's intended to help work around
858 problems in GLD which doesn't understand that armv5t code is
859 interworking clean. */
860 int arm_cpp_interwork = 0;
862 /* Nonzero if chip supports Thumb 2. */
863 int arm_arch_thumb2;
865 /* Nonzero if chip supports integer division instruction. */
866 int arm_arch_arm_hwdiv;
867 int arm_arch_thumb_hwdiv;
869 /* Nonzero if chip disallows volatile memory access in IT block. */
870 int arm_arch_no_volatile_ce;
872 /* Nonzero if we should use Neon to handle 64-bits operations rather
873 than core registers. */
874 int prefer_neon_for_64bits = 0;
876 /* Nonzero if we shouldn't use literal pools. */
877 bool arm_disable_literal_pool = false;
879 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
880 we must report the mode of the memory reference from
881 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
882 machine_mode output_memory_reference_mode;
884 /* The register number to be used for the PIC offset register. */
885 unsigned arm_pic_register = INVALID_REGNUM;
887 enum arm_pcs arm_pcs_default;
889 /* For an explanation of these variables, see final_prescan_insn below. */
890 int arm_ccfsm_state;
891 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
892 enum arm_cond_code arm_current_cc;
894 rtx arm_target_insn;
895 int arm_target_label;
896 /* The number of conditionally executed insns, including the current insn. */
897 int arm_condexec_count = 0;
898 /* A bitmask specifying the patterns for the IT block.
899 Zero means do not output an IT block before this insn. */
900 int arm_condexec_mask = 0;
901 /* The number of bits used in arm_condexec_mask. */
902 int arm_condexec_masklen = 0;
904 /* Nonzero if chip supports the ARMv8 CRC instructions. */
905 int arm_arch_crc = 0;
907 /* Nonzero if the core has a very small, high-latency, multiply unit. */
908 int arm_m_profile_small_mul = 0;
910 /* The condition codes of the ARM, and the inverse function. */
911 static const char * const arm_condition_codes[] =
913 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
914 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
917 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
918 int arm_regs_in_sequence[] =
920 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
923 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
924 #define streq(string1, string2) (strcmp (string1, string2) == 0)
926 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
927 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
928 | (1 << PIC_OFFSET_TABLE_REGNUM)))
930 /* Initialization code. */
932 struct processors
934 const char *const name;
935 enum processor_type core;
936 const char *arch;
937 enum base_architecture base_arch;
938 const unsigned long flags;
939 const struct tune_params *const tune;
943 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
944 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
945 prefetch_slots, \
946 l1_size, \
947 l1_line_size
949 /* arm generic vectorizer costs. */
950 static const
951 struct cpu_vec_costs arm_default_vec_cost = {
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 1, /* vec_unalign_load_cost. */
960 1, /* vec_unalign_store_cost. */
961 1, /* vec_store_cost. */
962 3, /* cond_taken_branch_cost. */
963 1, /* cond_not_taken_branch_cost. */
966 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
967 #include "aarch-cost-tables.h"
971 const struct cpu_cost_table cortexa9_extra_costs =
973 /* ALU */
975 0, /* arith. */
976 0, /* logical. */
977 0, /* shift. */
978 COSTS_N_INSNS (1), /* shift_reg. */
979 COSTS_N_INSNS (1), /* arith_shift. */
980 COSTS_N_INSNS (2), /* arith_shift_reg. */
981 0, /* log_shift. */
982 COSTS_N_INSNS (1), /* log_shift_reg. */
983 COSTS_N_INSNS (1), /* extend. */
984 COSTS_N_INSNS (2), /* extend_arith. */
985 COSTS_N_INSNS (1), /* bfi. */
986 COSTS_N_INSNS (1), /* bfx. */
987 0, /* clz. */
988 0, /* rev. */
989 0, /* non_exec. */
990 true /* non_exec_costs_exec. */
993 /* MULT SImode */
995 COSTS_N_INSNS (3), /* simple. */
996 COSTS_N_INSNS (3), /* flag_setting. */
997 COSTS_N_INSNS (2), /* extend. */
998 COSTS_N_INSNS (3), /* add. */
999 COSTS_N_INSNS (2), /* extend_add. */
1000 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1002 /* MULT DImode */
1004 0, /* simple (N/A). */
1005 0, /* flag_setting (N/A). */
1006 COSTS_N_INSNS (4), /* extend. */
1007 0, /* add (N/A). */
1008 COSTS_N_INSNS (4), /* extend_add. */
1009 0 /* idiv (N/A). */
1012 /* LD/ST */
1014 COSTS_N_INSNS (2), /* load. */
1015 COSTS_N_INSNS (2), /* load_sign_extend. */
1016 COSTS_N_INSNS (2), /* ldrd. */
1017 COSTS_N_INSNS (2), /* ldm_1st. */
1018 1, /* ldm_regs_per_insn_1st. */
1019 2, /* ldm_regs_per_insn_subsequent. */
1020 COSTS_N_INSNS (5), /* loadf. */
1021 COSTS_N_INSNS (5), /* loadd. */
1022 COSTS_N_INSNS (1), /* load_unaligned. */
1023 COSTS_N_INSNS (2), /* store. */
1024 COSTS_N_INSNS (2), /* strd. */
1025 COSTS_N_INSNS (2), /* stm_1st. */
1026 1, /* stm_regs_per_insn_1st. */
1027 2, /* stm_regs_per_insn_subsequent. */
1028 COSTS_N_INSNS (1), /* storef. */
1029 COSTS_N_INSNS (1), /* stored. */
1030 COSTS_N_INSNS (1) /* store_unaligned. */
1033 /* FP SFmode */
1035 COSTS_N_INSNS (14), /* div. */
1036 COSTS_N_INSNS (4), /* mult. */
1037 COSTS_N_INSNS (7), /* mult_addsub. */
1038 COSTS_N_INSNS (30), /* fma. */
1039 COSTS_N_INSNS (3), /* addsub. */
1040 COSTS_N_INSNS (1), /* fpconst. */
1041 COSTS_N_INSNS (1), /* neg. */
1042 COSTS_N_INSNS (3), /* compare. */
1043 COSTS_N_INSNS (3), /* widen. */
1044 COSTS_N_INSNS (3), /* narrow. */
1045 COSTS_N_INSNS (3), /* toint. */
1046 COSTS_N_INSNS (3), /* fromint. */
1047 COSTS_N_INSNS (3) /* roundint. */
1049 /* FP DFmode */
1051 COSTS_N_INSNS (24), /* div. */
1052 COSTS_N_INSNS (5), /* mult. */
1053 COSTS_N_INSNS (8), /* mult_addsub. */
1054 COSTS_N_INSNS (30), /* fma. */
1055 COSTS_N_INSNS (3), /* addsub. */
1056 COSTS_N_INSNS (1), /* fpconst. */
1057 COSTS_N_INSNS (1), /* neg. */
1058 COSTS_N_INSNS (3), /* compare. */
1059 COSTS_N_INSNS (3), /* widen. */
1060 COSTS_N_INSNS (3), /* narrow. */
1061 COSTS_N_INSNS (3), /* toint. */
1062 COSTS_N_INSNS (3), /* fromint. */
1063 COSTS_N_INSNS (3) /* roundint. */
1066 /* Vector */
1068 COSTS_N_INSNS (1) /* alu. */
1072 const struct cpu_cost_table cortexa8_extra_costs =
1074 /* ALU */
1076 0, /* arith. */
1077 0, /* logical. */
1078 COSTS_N_INSNS (1), /* shift. */
1079 0, /* shift_reg. */
1080 COSTS_N_INSNS (1), /* arith_shift. */
1081 0, /* arith_shift_reg. */
1082 COSTS_N_INSNS (1), /* log_shift. */
1083 0, /* log_shift_reg. */
1084 0, /* extend. */
1085 0, /* extend_arith. */
1086 0, /* bfi. */
1087 0, /* bfx. */
1088 0, /* clz. */
1089 0, /* rev. */
1090 0, /* non_exec. */
1091 true /* non_exec_costs_exec. */
1094 /* MULT SImode */
1096 COSTS_N_INSNS (1), /* simple. */
1097 COSTS_N_INSNS (1), /* flag_setting. */
1098 COSTS_N_INSNS (1), /* extend. */
1099 COSTS_N_INSNS (1), /* add. */
1100 COSTS_N_INSNS (1), /* extend_add. */
1101 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1103 /* MULT DImode */
1105 0, /* simple (N/A). */
1106 0, /* flag_setting (N/A). */
1107 COSTS_N_INSNS (2), /* extend. */
1108 0, /* add (N/A). */
1109 COSTS_N_INSNS (2), /* extend_add. */
1110 0 /* idiv (N/A). */
1113 /* LD/ST */
1115 COSTS_N_INSNS (1), /* load. */
1116 COSTS_N_INSNS (1), /* load_sign_extend. */
1117 COSTS_N_INSNS (1), /* ldrd. */
1118 COSTS_N_INSNS (1), /* ldm_1st. */
1119 1, /* ldm_regs_per_insn_1st. */
1120 2, /* ldm_regs_per_insn_subsequent. */
1121 COSTS_N_INSNS (1), /* loadf. */
1122 COSTS_N_INSNS (1), /* loadd. */
1123 COSTS_N_INSNS (1), /* load_unaligned. */
1124 COSTS_N_INSNS (1), /* store. */
1125 COSTS_N_INSNS (1), /* strd. */
1126 COSTS_N_INSNS (1), /* stm_1st. */
1127 1, /* stm_regs_per_insn_1st. */
1128 2, /* stm_regs_per_insn_subsequent. */
1129 COSTS_N_INSNS (1), /* storef. */
1130 COSTS_N_INSNS (1), /* stored. */
1131 COSTS_N_INSNS (1) /* store_unaligned. */
1134 /* FP SFmode */
1136 COSTS_N_INSNS (36), /* div. */
1137 COSTS_N_INSNS (11), /* mult. */
1138 COSTS_N_INSNS (20), /* mult_addsub. */
1139 COSTS_N_INSNS (30), /* fma. */
1140 COSTS_N_INSNS (9), /* addsub. */
1141 COSTS_N_INSNS (3), /* fpconst. */
1142 COSTS_N_INSNS (3), /* neg. */
1143 COSTS_N_INSNS (6), /* compare. */
1144 COSTS_N_INSNS (4), /* widen. */
1145 COSTS_N_INSNS (4), /* narrow. */
1146 COSTS_N_INSNS (8), /* toint. */
1147 COSTS_N_INSNS (8), /* fromint. */
1148 COSTS_N_INSNS (8) /* roundint. */
1150 /* FP DFmode */
1152 COSTS_N_INSNS (64), /* div. */
1153 COSTS_N_INSNS (16), /* mult. */
1154 COSTS_N_INSNS (25), /* mult_addsub. */
1155 COSTS_N_INSNS (30), /* fma. */
1156 COSTS_N_INSNS (9), /* addsub. */
1157 COSTS_N_INSNS (3), /* fpconst. */
1158 COSTS_N_INSNS (3), /* neg. */
1159 COSTS_N_INSNS (6), /* compare. */
1160 COSTS_N_INSNS (6), /* widen. */
1161 COSTS_N_INSNS (6), /* narrow. */
1162 COSTS_N_INSNS (8), /* toint. */
1163 COSTS_N_INSNS (8), /* fromint. */
1164 COSTS_N_INSNS (8) /* roundint. */
1167 /* Vector */
1169 COSTS_N_INSNS (1) /* alu. */
1173 const struct cpu_cost_table cortexa5_extra_costs =
1175 /* ALU */
1177 0, /* arith. */
1178 0, /* logical. */
1179 COSTS_N_INSNS (1), /* shift. */
1180 COSTS_N_INSNS (1), /* shift_reg. */
1181 COSTS_N_INSNS (1), /* arith_shift. */
1182 COSTS_N_INSNS (1), /* arith_shift_reg. */
1183 COSTS_N_INSNS (1), /* log_shift. */
1184 COSTS_N_INSNS (1), /* log_shift_reg. */
1185 COSTS_N_INSNS (1), /* extend. */
1186 COSTS_N_INSNS (1), /* extend_arith. */
1187 COSTS_N_INSNS (1), /* bfi. */
1188 COSTS_N_INSNS (1), /* bfx. */
1189 COSTS_N_INSNS (1), /* clz. */
1190 COSTS_N_INSNS (1), /* rev. */
1191 0, /* non_exec. */
1192 true /* non_exec_costs_exec. */
1196 /* MULT SImode */
1198 0, /* simple. */
1199 COSTS_N_INSNS (1), /* flag_setting. */
1200 COSTS_N_INSNS (1), /* extend. */
1201 COSTS_N_INSNS (1), /* add. */
1202 COSTS_N_INSNS (1), /* extend_add. */
1203 COSTS_N_INSNS (7) /* idiv. */
1205 /* MULT DImode */
1207 0, /* simple (N/A). */
1208 0, /* flag_setting (N/A). */
1209 COSTS_N_INSNS (1), /* extend. */
1210 0, /* add. */
1211 COSTS_N_INSNS (2), /* extend_add. */
1212 0 /* idiv (N/A). */
1215 /* LD/ST */
1217 COSTS_N_INSNS (1), /* load. */
1218 COSTS_N_INSNS (1), /* load_sign_extend. */
1219 COSTS_N_INSNS (6), /* ldrd. */
1220 COSTS_N_INSNS (1), /* ldm_1st. */
1221 1, /* ldm_regs_per_insn_1st. */
1222 2, /* ldm_regs_per_insn_subsequent. */
1223 COSTS_N_INSNS (2), /* loadf. */
1224 COSTS_N_INSNS (4), /* loadd. */
1225 COSTS_N_INSNS (1), /* load_unaligned. */
1226 COSTS_N_INSNS (1), /* store. */
1227 COSTS_N_INSNS (3), /* strd. */
1228 COSTS_N_INSNS (1), /* stm_1st. */
1229 1, /* stm_regs_per_insn_1st. */
1230 2, /* stm_regs_per_insn_subsequent. */
1231 COSTS_N_INSNS (2), /* storef. */
1232 COSTS_N_INSNS (2), /* stored. */
1233 COSTS_N_INSNS (1) /* store_unaligned. */
1236 /* FP SFmode */
1238 COSTS_N_INSNS (15), /* div. */
1239 COSTS_N_INSNS (3), /* mult. */
1240 COSTS_N_INSNS (7), /* mult_addsub. */
1241 COSTS_N_INSNS (7), /* fma. */
1242 COSTS_N_INSNS (3), /* addsub. */
1243 COSTS_N_INSNS (3), /* fpconst. */
1244 COSTS_N_INSNS (3), /* neg. */
1245 COSTS_N_INSNS (3), /* compare. */
1246 COSTS_N_INSNS (3), /* widen. */
1247 COSTS_N_INSNS (3), /* narrow. */
1248 COSTS_N_INSNS (3), /* toint. */
1249 COSTS_N_INSNS (3), /* fromint. */
1250 COSTS_N_INSNS (3) /* roundint. */
1252 /* FP DFmode */
1254 COSTS_N_INSNS (30), /* div. */
1255 COSTS_N_INSNS (6), /* mult. */
1256 COSTS_N_INSNS (10), /* mult_addsub. */
1257 COSTS_N_INSNS (7), /* fma. */
1258 COSTS_N_INSNS (3), /* addsub. */
1259 COSTS_N_INSNS (3), /* fpconst. */
1260 COSTS_N_INSNS (3), /* neg. */
1261 COSTS_N_INSNS (3), /* compare. */
1262 COSTS_N_INSNS (3), /* widen. */
1263 COSTS_N_INSNS (3), /* narrow. */
1264 COSTS_N_INSNS (3), /* toint. */
1265 COSTS_N_INSNS (3), /* fromint. */
1266 COSTS_N_INSNS (3) /* roundint. */
1269 /* Vector */
1271 COSTS_N_INSNS (1) /* alu. */
1276 const struct cpu_cost_table cortexa7_extra_costs =
1278 /* ALU */
1280 0, /* arith. */
1281 0, /* logical. */
1282 COSTS_N_INSNS (1), /* shift. */
1283 COSTS_N_INSNS (1), /* shift_reg. */
1284 COSTS_N_INSNS (1), /* arith_shift. */
1285 COSTS_N_INSNS (1), /* arith_shift_reg. */
1286 COSTS_N_INSNS (1), /* log_shift. */
1287 COSTS_N_INSNS (1), /* log_shift_reg. */
1288 COSTS_N_INSNS (1), /* extend. */
1289 COSTS_N_INSNS (1), /* extend_arith. */
1290 COSTS_N_INSNS (1), /* bfi. */
1291 COSTS_N_INSNS (1), /* bfx. */
1292 COSTS_N_INSNS (1), /* clz. */
1293 COSTS_N_INSNS (1), /* rev. */
1294 0, /* non_exec. */
1295 true /* non_exec_costs_exec. */
1299 /* MULT SImode */
1301 0, /* simple. */
1302 COSTS_N_INSNS (1), /* flag_setting. */
1303 COSTS_N_INSNS (1), /* extend. */
1304 COSTS_N_INSNS (1), /* add. */
1305 COSTS_N_INSNS (1), /* extend_add. */
1306 COSTS_N_INSNS (7) /* idiv. */
1308 /* MULT DImode */
1310 0, /* simple (N/A). */
1311 0, /* flag_setting (N/A). */
1312 COSTS_N_INSNS (1), /* extend. */
1313 0, /* add. */
1314 COSTS_N_INSNS (2), /* extend_add. */
1315 0 /* idiv (N/A). */
1318 /* LD/ST */
1320 COSTS_N_INSNS (1), /* load. */
1321 COSTS_N_INSNS (1), /* load_sign_extend. */
1322 COSTS_N_INSNS (3), /* ldrd. */
1323 COSTS_N_INSNS (1), /* ldm_1st. */
1324 1, /* ldm_regs_per_insn_1st. */
1325 2, /* ldm_regs_per_insn_subsequent. */
1326 COSTS_N_INSNS (2), /* loadf. */
1327 COSTS_N_INSNS (2), /* loadd. */
1328 COSTS_N_INSNS (1), /* load_unaligned. */
1329 COSTS_N_INSNS (1), /* store. */
1330 COSTS_N_INSNS (3), /* strd. */
1331 COSTS_N_INSNS (1), /* stm_1st. */
1332 1, /* stm_regs_per_insn_1st. */
1333 2, /* stm_regs_per_insn_subsequent. */
1334 COSTS_N_INSNS (2), /* storef. */
1335 COSTS_N_INSNS (2), /* stored. */
1336 COSTS_N_INSNS (1) /* store_unaligned. */
1339 /* FP SFmode */
1341 COSTS_N_INSNS (15), /* div. */
1342 COSTS_N_INSNS (3), /* mult. */
1343 COSTS_N_INSNS (7), /* mult_addsub. */
1344 COSTS_N_INSNS (7), /* fma. */
1345 COSTS_N_INSNS (3), /* addsub. */
1346 COSTS_N_INSNS (3), /* fpconst. */
1347 COSTS_N_INSNS (3), /* neg. */
1348 COSTS_N_INSNS (3), /* compare. */
1349 COSTS_N_INSNS (3), /* widen. */
1350 COSTS_N_INSNS (3), /* narrow. */
1351 COSTS_N_INSNS (3), /* toint. */
1352 COSTS_N_INSNS (3), /* fromint. */
1353 COSTS_N_INSNS (3) /* roundint. */
1355 /* FP DFmode */
1357 COSTS_N_INSNS (30), /* div. */
1358 COSTS_N_INSNS (6), /* mult. */
1359 COSTS_N_INSNS (10), /* mult_addsub. */
1360 COSTS_N_INSNS (7), /* fma. */
1361 COSTS_N_INSNS (3), /* addsub. */
1362 COSTS_N_INSNS (3), /* fpconst. */
1363 COSTS_N_INSNS (3), /* neg. */
1364 COSTS_N_INSNS (3), /* compare. */
1365 COSTS_N_INSNS (3), /* widen. */
1366 COSTS_N_INSNS (3), /* narrow. */
1367 COSTS_N_INSNS (3), /* toint. */
1368 COSTS_N_INSNS (3), /* fromint. */
1369 COSTS_N_INSNS (3) /* roundint. */
1372 /* Vector */
1374 COSTS_N_INSNS (1) /* alu. */
1378 const struct cpu_cost_table cortexa12_extra_costs =
1380 /* ALU */
1382 0, /* arith. */
1383 0, /* logical. */
1384 0, /* shift. */
1385 COSTS_N_INSNS (1), /* shift_reg. */
1386 COSTS_N_INSNS (1), /* arith_shift. */
1387 COSTS_N_INSNS (1), /* arith_shift_reg. */
1388 COSTS_N_INSNS (1), /* log_shift. */
1389 COSTS_N_INSNS (1), /* log_shift_reg. */
1390 0, /* extend. */
1391 COSTS_N_INSNS (1), /* extend_arith. */
1392 0, /* bfi. */
1393 COSTS_N_INSNS (1), /* bfx. */
1394 COSTS_N_INSNS (1), /* clz. */
1395 COSTS_N_INSNS (1), /* rev. */
1396 0, /* non_exec. */
1397 true /* non_exec_costs_exec. */
1399 /* MULT SImode */
1402 COSTS_N_INSNS (2), /* simple. */
1403 COSTS_N_INSNS (3), /* flag_setting. */
1404 COSTS_N_INSNS (2), /* extend. */
1405 COSTS_N_INSNS (3), /* add. */
1406 COSTS_N_INSNS (2), /* extend_add. */
1407 COSTS_N_INSNS (18) /* idiv. */
1409 /* MULT DImode */
1411 0, /* simple (N/A). */
1412 0, /* flag_setting (N/A). */
1413 COSTS_N_INSNS (3), /* extend. */
1414 0, /* add (N/A). */
1415 COSTS_N_INSNS (3), /* extend_add. */
1416 0 /* idiv (N/A). */
1419 /* LD/ST */
1421 COSTS_N_INSNS (3), /* load. */
1422 COSTS_N_INSNS (3), /* load_sign_extend. */
1423 COSTS_N_INSNS (3), /* ldrd. */
1424 COSTS_N_INSNS (3), /* ldm_1st. */
1425 1, /* ldm_regs_per_insn_1st. */
1426 2, /* ldm_regs_per_insn_subsequent. */
1427 COSTS_N_INSNS (3), /* loadf. */
1428 COSTS_N_INSNS (3), /* loadd. */
1429 0, /* load_unaligned. */
1430 0, /* store. */
1431 0, /* strd. */
1432 0, /* stm_1st. */
1433 1, /* stm_regs_per_insn_1st. */
1434 2, /* stm_regs_per_insn_subsequent. */
1435 COSTS_N_INSNS (2), /* storef. */
1436 COSTS_N_INSNS (2), /* stored. */
1437 0 /* store_unaligned. */
1440 /* FP SFmode */
1442 COSTS_N_INSNS (17), /* div. */
1443 COSTS_N_INSNS (4), /* mult. */
1444 COSTS_N_INSNS (8), /* mult_addsub. */
1445 COSTS_N_INSNS (8), /* fma. */
1446 COSTS_N_INSNS (4), /* addsub. */
1447 COSTS_N_INSNS (2), /* fpconst. */
1448 COSTS_N_INSNS (2), /* neg. */
1449 COSTS_N_INSNS (2), /* compare. */
1450 COSTS_N_INSNS (4), /* widen. */
1451 COSTS_N_INSNS (4), /* narrow. */
1452 COSTS_N_INSNS (4), /* toint. */
1453 COSTS_N_INSNS (4), /* fromint. */
1454 COSTS_N_INSNS (4) /* roundint. */
1456 /* FP DFmode */
1458 COSTS_N_INSNS (31), /* div. */
1459 COSTS_N_INSNS (4), /* mult. */
1460 COSTS_N_INSNS (8), /* mult_addsub. */
1461 COSTS_N_INSNS (8), /* fma. */
1462 COSTS_N_INSNS (4), /* addsub. */
1463 COSTS_N_INSNS (2), /* fpconst. */
1464 COSTS_N_INSNS (2), /* neg. */
1465 COSTS_N_INSNS (2), /* compare. */
1466 COSTS_N_INSNS (4), /* widen. */
1467 COSTS_N_INSNS (4), /* narrow. */
1468 COSTS_N_INSNS (4), /* toint. */
1469 COSTS_N_INSNS (4), /* fromint. */
1470 COSTS_N_INSNS (4) /* roundint. */
1473 /* Vector */
1475 COSTS_N_INSNS (1) /* alu. */
1479 const struct cpu_cost_table cortexa15_extra_costs =
1481 /* ALU */
1483 0, /* arith. */
1484 0, /* logical. */
1485 0, /* shift. */
1486 0, /* shift_reg. */
1487 COSTS_N_INSNS (1), /* arith_shift. */
1488 COSTS_N_INSNS (1), /* arith_shift_reg. */
1489 COSTS_N_INSNS (1), /* log_shift. */
1490 COSTS_N_INSNS (1), /* log_shift_reg. */
1491 0, /* extend. */
1492 COSTS_N_INSNS (1), /* extend_arith. */
1493 COSTS_N_INSNS (1), /* bfi. */
1494 0, /* bfx. */
1495 0, /* clz. */
1496 0, /* rev. */
1497 0, /* non_exec. */
1498 true /* non_exec_costs_exec. */
1500 /* MULT SImode */
1503 COSTS_N_INSNS (2), /* simple. */
1504 COSTS_N_INSNS (3), /* flag_setting. */
1505 COSTS_N_INSNS (2), /* extend. */
1506 COSTS_N_INSNS (2), /* add. */
1507 COSTS_N_INSNS (2), /* extend_add. */
1508 COSTS_N_INSNS (18) /* idiv. */
1510 /* MULT DImode */
1512 0, /* simple (N/A). */
1513 0, /* flag_setting (N/A). */
1514 COSTS_N_INSNS (3), /* extend. */
1515 0, /* add (N/A). */
1516 COSTS_N_INSNS (3), /* extend_add. */
1517 0 /* idiv (N/A). */
1520 /* LD/ST */
1522 COSTS_N_INSNS (3), /* load. */
1523 COSTS_N_INSNS (3), /* load_sign_extend. */
1524 COSTS_N_INSNS (3), /* ldrd. */
1525 COSTS_N_INSNS (4), /* ldm_1st. */
1526 1, /* ldm_regs_per_insn_1st. */
1527 2, /* ldm_regs_per_insn_subsequent. */
1528 COSTS_N_INSNS (4), /* loadf. */
1529 COSTS_N_INSNS (4), /* loadd. */
1530 0, /* load_unaligned. */
1531 0, /* store. */
1532 0, /* strd. */
1533 COSTS_N_INSNS (1), /* stm_1st. */
1534 1, /* stm_regs_per_insn_1st. */
1535 2, /* stm_regs_per_insn_subsequent. */
1536 0, /* storef. */
1537 0, /* stored. */
1538 0 /* store_unaligned. */
1541 /* FP SFmode */
1543 COSTS_N_INSNS (17), /* div. */
1544 COSTS_N_INSNS (4), /* mult. */
1545 COSTS_N_INSNS (8), /* mult_addsub. */
1546 COSTS_N_INSNS (8), /* fma. */
1547 COSTS_N_INSNS (4), /* addsub. */
1548 COSTS_N_INSNS (2), /* fpconst. */
1549 COSTS_N_INSNS (2), /* neg. */
1550 COSTS_N_INSNS (5), /* compare. */
1551 COSTS_N_INSNS (4), /* widen. */
1552 COSTS_N_INSNS (4), /* narrow. */
1553 COSTS_N_INSNS (4), /* toint. */
1554 COSTS_N_INSNS (4), /* fromint. */
1555 COSTS_N_INSNS (4) /* roundint. */
1557 /* FP DFmode */
1559 COSTS_N_INSNS (31), /* div. */
1560 COSTS_N_INSNS (4), /* mult. */
1561 COSTS_N_INSNS (8), /* mult_addsub. */
1562 COSTS_N_INSNS (8), /* fma. */
1563 COSTS_N_INSNS (4), /* addsub. */
1564 COSTS_N_INSNS (2), /* fpconst. */
1565 COSTS_N_INSNS (2), /* neg. */
1566 COSTS_N_INSNS (2), /* compare. */
1567 COSTS_N_INSNS (4), /* widen. */
1568 COSTS_N_INSNS (4), /* narrow. */
1569 COSTS_N_INSNS (4), /* toint. */
1570 COSTS_N_INSNS (4), /* fromint. */
1571 COSTS_N_INSNS (4) /* roundint. */
1574 /* Vector */
1576 COSTS_N_INSNS (1) /* alu. */
1580 const struct cpu_cost_table v7m_extra_costs =
1582 /* ALU */
1584 0, /* arith. */
1585 0, /* logical. */
1586 0, /* shift. */
1587 0, /* shift_reg. */
1588 0, /* arith_shift. */
1589 COSTS_N_INSNS (1), /* arith_shift_reg. */
1590 0, /* log_shift. */
1591 COSTS_N_INSNS (1), /* log_shift_reg. */
1592 0, /* extend. */
1593 COSTS_N_INSNS (1), /* extend_arith. */
1594 0, /* bfi. */
1595 0, /* bfx. */
1596 0, /* clz. */
1597 0, /* rev. */
1598 COSTS_N_INSNS (1), /* non_exec. */
1599 false /* non_exec_costs_exec. */
1602 /* MULT SImode */
1604 COSTS_N_INSNS (1), /* simple. */
1605 COSTS_N_INSNS (1), /* flag_setting. */
1606 COSTS_N_INSNS (2), /* extend. */
1607 COSTS_N_INSNS (1), /* add. */
1608 COSTS_N_INSNS (3), /* extend_add. */
1609 COSTS_N_INSNS (8) /* idiv. */
1611 /* MULT DImode */
1613 0, /* simple (N/A). */
1614 0, /* flag_setting (N/A). */
1615 COSTS_N_INSNS (2), /* extend. */
1616 0, /* add (N/A). */
1617 COSTS_N_INSNS (3), /* extend_add. */
1618 0 /* idiv (N/A). */
1621 /* LD/ST */
1623 COSTS_N_INSNS (2), /* load. */
1624 0, /* load_sign_extend. */
1625 COSTS_N_INSNS (3), /* ldrd. */
1626 COSTS_N_INSNS (2), /* ldm_1st. */
1627 1, /* ldm_regs_per_insn_1st. */
1628 1, /* ldm_regs_per_insn_subsequent. */
1629 COSTS_N_INSNS (2), /* loadf. */
1630 COSTS_N_INSNS (3), /* loadd. */
1631 COSTS_N_INSNS (1), /* load_unaligned. */
1632 COSTS_N_INSNS (2), /* store. */
1633 COSTS_N_INSNS (3), /* strd. */
1634 COSTS_N_INSNS (2), /* stm_1st. */
1635 1, /* stm_regs_per_insn_1st. */
1636 1, /* stm_regs_per_insn_subsequent. */
1637 COSTS_N_INSNS (2), /* storef. */
1638 COSTS_N_INSNS (3), /* stored. */
1639 COSTS_N_INSNS (1) /* store_unaligned. */
1642 /* FP SFmode */
1644 COSTS_N_INSNS (7), /* div. */
1645 COSTS_N_INSNS (2), /* mult. */
1646 COSTS_N_INSNS (5), /* mult_addsub. */
1647 COSTS_N_INSNS (3), /* fma. */
1648 COSTS_N_INSNS (1), /* addsub. */
1649 0, /* fpconst. */
1650 0, /* neg. */
1651 0, /* compare. */
1652 0, /* widen. */
1653 0, /* narrow. */
1654 0, /* toint. */
1655 0, /* fromint. */
1656 0 /* roundint. */
1658 /* FP DFmode */
1660 COSTS_N_INSNS (15), /* div. */
1661 COSTS_N_INSNS (5), /* mult. */
1662 COSTS_N_INSNS (7), /* mult_addsub. */
1663 COSTS_N_INSNS (7), /* fma. */
1664 COSTS_N_INSNS (3), /* addsub. */
1665 0, /* fpconst. */
1666 0, /* neg. */
1667 0, /* compare. */
1668 0, /* widen. */
1669 0, /* narrow. */
1670 0, /* toint. */
1671 0, /* fromint. */
1672 0 /* roundint. */
1675 /* Vector */
1677 COSTS_N_INSNS (1) /* alu. */
1681 #define ARM_FUSE_NOTHING (0)
1682 #define ARM_FUSE_MOVW_MOVT (1 << 0)
1684 const struct tune_params arm_slowmul_tune =
1686 arm_slowmul_rtx_costs,
1687 NULL,
1688 NULL, /* Sched adj cost. */
1689 3, /* Constant limit. */
1690 5, /* Max cond insns. */
1691 ARM_PREFETCH_NOT_BENEFICIAL,
1692 true, /* Prefer constant pool. */
1693 arm_default_branch_cost,
1694 false, /* Prefer LDRD/STRD. */
1695 {true, true}, /* Prefer non short circuit. */
1696 &arm_default_vec_cost, /* Vectorizer costs. */
1697 false, /* Prefer Neon for 64-bits bitops. */
1698 false, false, /* Prefer 32-bit encodings. */
1699 false, /* Prefer Neon for stringops. */
1700 8, /* Maximum insns to inline memset. */
1701 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1702 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
1703 1 /* Issue rate. */
1706 const struct tune_params arm_fastmul_tune =
1708 arm_fastmul_rtx_costs,
1709 NULL,
1710 NULL, /* Sched adj cost. */
1711 1, /* Constant limit. */
1712 5, /* Max cond insns. */
1713 ARM_PREFETCH_NOT_BENEFICIAL,
1714 true, /* Prefer constant pool. */
1715 arm_default_branch_cost,
1716 false, /* Prefer LDRD/STRD. */
1717 {true, true}, /* Prefer non short circuit. */
1718 &arm_default_vec_cost, /* Vectorizer costs. */
1719 false, /* Prefer Neon for 64-bits bitops. */
1720 false, false, /* Prefer 32-bit encodings. */
1721 false, /* Prefer Neon for stringops. */
1722 8, /* Maximum insns to inline memset. */
1723 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1724 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
1725 1 /* Issue rate. */
1728 /* StrongARM has early execution of branches, so a sequence that is worth
1729 skipping is shorter. Set max_insns_skipped to a lower value. */
1731 const struct tune_params arm_strongarm_tune =
1733 arm_fastmul_rtx_costs,
1734 NULL,
1735 NULL, /* Sched adj cost. */
1736 1, /* Constant limit. */
1737 3, /* Max cond insns. */
1738 ARM_PREFETCH_NOT_BENEFICIAL,
1739 true, /* Prefer constant pool. */
1740 arm_default_branch_cost,
1741 false, /* Prefer LDRD/STRD. */
1742 {true, true}, /* Prefer non short circuit. */
1743 &arm_default_vec_cost, /* Vectorizer costs. */
1744 false, /* Prefer Neon for 64-bits bitops. */
1745 false, false, /* Prefer 32-bit encodings. */
1746 false, /* Prefer Neon for stringops. */
1747 8, /* Maximum insns to inline memset. */
1748 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1749 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
1750 1 /* Issue rate. */
1753 const struct tune_params arm_xscale_tune =
1755 arm_xscale_rtx_costs,
1756 NULL,
1757 xscale_sched_adjust_cost,
1758 2, /* Constant limit. */
1759 3, /* Max cond insns. */
1760 ARM_PREFETCH_NOT_BENEFICIAL,
1761 true, /* Prefer constant pool. */
1762 arm_default_branch_cost,
1763 false, /* Prefer LDRD/STRD. */
1764 {true, true}, /* Prefer non short circuit. */
1765 &arm_default_vec_cost, /* Vectorizer costs. */
1766 false, /* Prefer Neon for 64-bits bitops. */
1767 false, false, /* Prefer 32-bit encodings. */
1768 false, /* Prefer Neon for stringops. */
1769 8, /* Maximum insns to inline memset. */
1770 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1771 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
1772 1 /* Issue rate. */
1775 const struct tune_params arm_9e_tune =
1777 arm_9e_rtx_costs,
1778 NULL,
1779 NULL, /* Sched adj cost. */
1780 1, /* Constant limit. */
1781 5, /* Max cond insns. */
1782 ARM_PREFETCH_NOT_BENEFICIAL,
1783 true, /* Prefer constant pool. */
1784 arm_default_branch_cost,
1785 false, /* Prefer LDRD/STRD. */
1786 {true, true}, /* Prefer non short circuit. */
1787 &arm_default_vec_cost, /* Vectorizer costs. */
1788 false, /* Prefer Neon for 64-bits bitops. */
1789 false, false, /* Prefer 32-bit encodings. */
1790 false, /* Prefer Neon for stringops. */
1791 8, /* Maximum insns to inline memset. */
1792 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1793 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
1794 1 /* Issue rate. */
1797 const struct tune_params arm_marvell_pj4_tune =
1799 arm_9e_rtx_costs,
1800 NULL,
1801 NULL, /* Sched adj cost. */
1802 1, /* Constant limit. */
1803 5, /* Max cond insns. */
1804 ARM_PREFETCH_NOT_BENEFICIAL,
1805 true, /* Prefer constant pool. */
1806 arm_default_branch_cost,
1807 false, /* Prefer LDRD/STRD. */
1808 {true, true}, /* Prefer non short circuit. */
1809 &arm_default_vec_cost, /* Vectorizer costs. */
1810 false, /* Prefer Neon for 64-bits bitops. */
1811 false, false, /* Prefer 32-bit encodings. */
1812 false, /* Prefer Neon for stringops. */
1813 8, /* Maximum insns to inline memset. */
1814 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1815 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
1816 2 /* Issue rate. */
1819 const struct tune_params arm_v6t2_tune =
1821 arm_9e_rtx_costs,
1822 NULL,
1823 NULL, /* Sched adj cost. */
1824 1, /* Constant limit. */
1825 5, /* Max cond insns. */
1826 ARM_PREFETCH_NOT_BENEFICIAL,
1827 false, /* Prefer constant pool. */
1828 arm_default_branch_cost,
1829 false, /* Prefer LDRD/STRD. */
1830 {true, true}, /* Prefer non short circuit. */
1831 &arm_default_vec_cost, /* Vectorizer costs. */
1832 false, /* Prefer Neon for 64-bits bitops. */
1833 false, false, /* Prefer 32-bit encodings. */
1834 false, /* Prefer Neon for stringops. */
1835 8, /* Maximum insns to inline memset. */
1836 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1837 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
1838 1 /* Issue rate. */
1842 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1843 const struct tune_params arm_cortex_tune =
1845 arm_9e_rtx_costs,
1846 &generic_extra_costs,
1847 NULL, /* Sched adj cost. */
1848 1, /* Constant limit. */
1849 5, /* Max cond insns. */
1850 ARM_PREFETCH_NOT_BENEFICIAL,
1851 false, /* Prefer constant pool. */
1852 arm_default_branch_cost,
1853 false, /* Prefer LDRD/STRD. */
1854 {true, true}, /* Prefer non short circuit. */
1855 &arm_default_vec_cost, /* Vectorizer costs. */
1856 false, /* Prefer Neon for 64-bits bitops. */
1857 false, false, /* Prefer 32-bit encodings. */
1858 false, /* Prefer Neon for stringops. */
1859 8, /* Maximum insns to inline memset. */
1860 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1861 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
1862 2 /* Issue rate. */
1865 const struct tune_params arm_cortex_a8_tune =
1867 arm_9e_rtx_costs,
1868 &cortexa8_extra_costs,
1869 NULL, /* Sched adj cost. */
1870 1, /* Constant limit. */
1871 5, /* Max cond insns. */
1872 ARM_PREFETCH_NOT_BENEFICIAL,
1873 false, /* Prefer constant pool. */
1874 arm_default_branch_cost,
1875 false, /* Prefer LDRD/STRD. */
1876 {true, true}, /* Prefer non short circuit. */
1877 &arm_default_vec_cost, /* Vectorizer costs. */
1878 false, /* Prefer Neon for 64-bits bitops. */
1879 false, false, /* Prefer 32-bit encodings. */
1880 true, /* Prefer Neon for stringops. */
1881 8, /* Maximum insns to inline memset. */
1882 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1883 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
1884 2 /* Issue rate. */
1887 const struct tune_params arm_cortex_a7_tune =
1889 arm_9e_rtx_costs,
1890 &cortexa7_extra_costs,
1891 NULL,
1892 1, /* Constant limit. */
1893 5, /* Max cond insns. */
1894 ARM_PREFETCH_NOT_BENEFICIAL,
1895 false, /* Prefer constant pool. */
1896 arm_default_branch_cost,
1897 false, /* Prefer LDRD/STRD. */
1898 {true, true}, /* Prefer non short circuit. */
1899 &arm_default_vec_cost, /* Vectorizer costs. */
1900 false, /* Prefer Neon for 64-bits bitops. */
1901 false, false, /* Prefer 32-bit encodings. */
1902 true, /* Prefer Neon for stringops. */
1903 8, /* Maximum insns to inline memset. */
1904 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1905 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
1906 2 /* Issue rate. */
1909 const struct tune_params arm_cortex_a15_tune =
1911 arm_9e_rtx_costs,
1912 &cortexa15_extra_costs,
1913 NULL, /* Sched adj cost. */
1914 1, /* Constant limit. */
1915 2, /* Max cond insns. */
1916 ARM_PREFETCH_NOT_BENEFICIAL,
1917 false, /* Prefer constant pool. */
1918 arm_default_branch_cost,
1919 true, /* Prefer LDRD/STRD. */
1920 {true, true}, /* Prefer non short circuit. */
1921 &arm_default_vec_cost, /* Vectorizer costs. */
1922 false, /* Prefer Neon for 64-bits bitops. */
1923 true, true, /* Prefer 32-bit encodings. */
1924 true, /* Prefer Neon for stringops. */
1925 8, /* Maximum insns to inline memset. */
1926 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1927 ARM_SCHED_AUTOPREF_FULL, /* Sched L2 autopref. */
1928 3 /* Issue rate. */
1931 const struct tune_params arm_cortex_a53_tune =
1933 arm_9e_rtx_costs,
1934 &cortexa53_extra_costs,
1935 NULL, /* Scheduler cost adjustment. */
1936 1, /* Constant limit. */
1937 5, /* Max cond insns. */
1938 ARM_PREFETCH_NOT_BENEFICIAL,
1939 false, /* Prefer constant pool. */
1940 arm_default_branch_cost,
1941 false, /* Prefer LDRD/STRD. */
1942 {true, true}, /* Prefer non short circuit. */
1943 &arm_default_vec_cost, /* Vectorizer costs. */
1944 false, /* Prefer Neon for 64-bits bitops. */
1945 false, false, /* Prefer 32-bit encodings. */
1946 true, /* Prefer Neon for stringops. */
1947 8, /* Maximum insns to inline memset. */
1948 ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */
1949 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
1950 2 /* Issue rate. */
1953 const struct tune_params arm_cortex_a57_tune =
1955 arm_9e_rtx_costs,
1956 &cortexa57_extra_costs,
1957 NULL, /* Scheduler cost adjustment. */
1958 1, /* Constant limit. */
1959 2, /* Max cond insns. */
1960 ARM_PREFETCH_NOT_BENEFICIAL,
1961 false, /* Prefer constant pool. */
1962 arm_default_branch_cost,
1963 true, /* Prefer LDRD/STRD. */
1964 {true, true}, /* Prefer non short circuit. */
1965 &arm_default_vec_cost, /* Vectorizer costs. */
1966 false, /* Prefer Neon for 64-bits bitops. */
1967 true, true, /* Prefer 32-bit encodings. */
1968 true, /* Prefer Neon for stringops. */
1969 8, /* Maximum insns to inline memset. */
1970 ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */
1971 ARM_SCHED_AUTOPREF_FULL, /* Sched L2 autopref. */
1972 3 /* Issue rate. */
1975 const struct tune_params arm_xgene1_tune =
1977 arm_9e_rtx_costs,
1978 &xgene1_extra_costs,
1979 NULL, /* Scheduler cost adjustment. */
1980 1, /* Constant limit. */
1981 2, /* Max cond insns. */
1982 ARM_PREFETCH_NOT_BENEFICIAL,
1983 false, /* Prefer constant pool. */
1984 arm_default_branch_cost,
1985 true, /* Prefer LDRD/STRD. */
1986 {true, true}, /* Prefer non short circuit. */
1987 &arm_default_vec_cost, /* Vectorizer costs. */
1988 false, /* Prefer Neon for 64-bits bitops. */
1989 true, true, /* Prefer 32-bit encodings. */
1990 false, /* Prefer Neon for stringops. */
1991 32, /* Maximum insns to inline memset. */
1992 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1993 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
1994 4 /* Issue rate. */
1997 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1998 less appealing. Set max_insns_skipped to a low value. */
2000 const struct tune_params arm_cortex_a5_tune =
2002 arm_9e_rtx_costs,
2003 &cortexa5_extra_costs,
2004 NULL, /* Sched adj cost. */
2005 1, /* Constant limit. */
2006 1, /* Max cond insns. */
2007 ARM_PREFETCH_NOT_BENEFICIAL,
2008 false, /* Prefer constant pool. */
2009 arm_cortex_a5_branch_cost,
2010 false, /* Prefer LDRD/STRD. */
2011 {false, false}, /* Prefer non short circuit. */
2012 &arm_default_vec_cost, /* Vectorizer costs. */
2013 false, /* Prefer Neon for 64-bits bitops. */
2014 false, false, /* Prefer 32-bit encodings. */
2015 true, /* Prefer Neon for stringops. */
2016 8, /* Maximum insns to inline memset. */
2017 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
2018 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
2019 2 /* Issue rate. */
2022 const struct tune_params arm_cortex_a9_tune =
2024 arm_9e_rtx_costs,
2025 &cortexa9_extra_costs,
2026 cortex_a9_sched_adjust_cost,
2027 1, /* Constant limit. */
2028 5, /* Max cond insns. */
2029 ARM_PREFETCH_BENEFICIAL(4,32,32),
2030 false, /* Prefer constant pool. */
2031 arm_default_branch_cost,
2032 false, /* Prefer LDRD/STRD. */
2033 {true, true}, /* Prefer non short circuit. */
2034 &arm_default_vec_cost, /* Vectorizer costs. */
2035 false, /* Prefer Neon for 64-bits bitops. */
2036 false, false, /* Prefer 32-bit encodings. */
2037 false, /* Prefer Neon for stringops. */
2038 8, /* Maximum insns to inline memset. */
2039 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
2040 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
2041 2 /* Issue rate. */
2044 const struct tune_params arm_cortex_a12_tune =
2046 arm_9e_rtx_costs,
2047 &cortexa12_extra_costs,
2048 NULL, /* Sched adj cost. */
2049 1, /* Constant limit. */
2050 2, /* Max cond insns. */
2051 ARM_PREFETCH_NOT_BENEFICIAL,
2052 false, /* Prefer constant pool. */
2053 arm_default_branch_cost,
2054 true, /* Prefer LDRD/STRD. */
2055 {true, true}, /* Prefer non short circuit. */
2056 &arm_default_vec_cost, /* Vectorizer costs. */
2057 false, /* Prefer Neon for 64-bits bitops. */
2058 true, true, /* Prefer 32-bit encodings. */
2059 true, /* Prefer Neon for stringops. */
2060 8, /* Maximum insns to inline memset. */
2061 ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */
2062 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
2063 2 /* Issue rate. */
2066 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2067 cycle to execute each. An LDR from the constant pool also takes two cycles
2068 to execute, but mildly increases pipelining opportunity (consecutive
2069 loads/stores can be pipelined together, saving one cycle), and may also
2070 improve icache utilisation. Hence we prefer the constant pool for such
2071 processors. */
2073 const struct tune_params arm_v7m_tune =
2075 arm_9e_rtx_costs,
2076 &v7m_extra_costs,
2077 NULL, /* Sched adj cost. */
2078 1, /* Constant limit. */
2079 2, /* Max cond insns. */
2080 ARM_PREFETCH_NOT_BENEFICIAL,
2081 true, /* Prefer constant pool. */
2082 arm_cortex_m_branch_cost,
2083 false, /* Prefer LDRD/STRD. */
2084 {false, false}, /* Prefer non short circuit. */
2085 &arm_default_vec_cost, /* Vectorizer costs. */
2086 false, /* Prefer Neon for 64-bits bitops. */
2087 false, false, /* Prefer 32-bit encodings. */
2088 false, /* Prefer Neon for stringops. */
2089 8, /* Maximum insns to inline memset. */
2090 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
2091 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
2092 1 /* Issue rate. */
2095 /* Cortex-M7 tuning. */
2097 const struct tune_params arm_cortex_m7_tune =
2099 arm_9e_rtx_costs,
2100 &v7m_extra_costs,
2101 NULL, /* Sched adj cost. */
2102 0, /* Constant limit. */
2103 1, /* Max cond insns. */
2104 ARM_PREFETCH_NOT_BENEFICIAL,
2105 true, /* Prefer constant pool. */
2106 arm_cortex_m7_branch_cost,
2107 false, /* Prefer LDRD/STRD. */
2108 {true, true}, /* Prefer non short circuit. */
2109 &arm_default_vec_cost, /* Vectorizer costs. */
2110 false, /* Prefer Neon for 64-bits bitops. */
2111 false, false, /* Prefer 32-bit encodings. */
2112 false, /* Prefer Neon for stringops. */
2113 8, /* Maximum insns to inline memset. */
2114 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
2115 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
2116 2 /* Issue rate. */
2119 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2120 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
2121 const struct tune_params arm_v6m_tune =
2123 arm_9e_rtx_costs,
2124 NULL,
2125 NULL, /* Sched adj cost. */
2126 1, /* Constant limit. */
2127 5, /* Max cond insns. */
2128 ARM_PREFETCH_NOT_BENEFICIAL,
2129 false, /* Prefer constant pool. */
2130 arm_default_branch_cost,
2131 false, /* Prefer LDRD/STRD. */
2132 {false, false}, /* Prefer non short circuit. */
2133 &arm_default_vec_cost, /* Vectorizer costs. */
2134 false, /* Prefer Neon for 64-bits bitops. */
2135 false, false, /* Prefer 32-bit encodings. */
2136 false, /* Prefer Neon for stringops. */
2137 8, /* Maximum insns to inline memset. */
2138 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
2139 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
2140 1 /* Issue rate. */
2143 const struct tune_params arm_fa726te_tune =
2145 arm_9e_rtx_costs,
2146 NULL,
2147 fa726te_sched_adjust_cost,
2148 1, /* Constant limit. */
2149 5, /* Max cond insns. */
2150 ARM_PREFETCH_NOT_BENEFICIAL,
2151 true, /* Prefer constant pool. */
2152 arm_default_branch_cost,
2153 false, /* Prefer LDRD/STRD. */
2154 {true, true}, /* Prefer non short circuit. */
2155 &arm_default_vec_cost, /* Vectorizer costs. */
2156 false, /* Prefer Neon for 64-bits bitops. */
2157 false, false, /* Prefer 32-bit encodings. */
2158 false, /* Prefer Neon for stringops. */
2159 8, /* Maximum insns to inline memset. */
2160 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
2161 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
2162 2 /* Issue rate. */
2166 /* Not all of these give usefully different compilation alternatives,
2167 but there is no simple way of generalizing them. */
2168 static const struct processors all_cores[] =
2170 /* ARM Cores */
2171 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2172 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2173 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2174 #include "arm-cores.def"
2175 #undef ARM_CORE
2176 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2179 static const struct processors all_architectures[] =
2181 /* ARM Architectures */
2182 /* We don't specify tuning costs here as it will be figured out
2183 from the core. */
2185 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2186 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2187 #include "arm-arches.def"
2188 #undef ARM_ARCH
2189 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2193 /* These are populated as commandline arguments are processed, or NULL
2194 if not specified. */
2195 static const struct processors *arm_selected_arch;
2196 static const struct processors *arm_selected_cpu;
2197 static const struct processors *arm_selected_tune;
2199 /* The name of the preprocessor macro to define for this architecture. */
2201 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
2203 /* Available values for -mfpu=. */
2205 static const struct arm_fpu_desc all_fpus[] =
2207 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2208 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2209 #include "arm-fpus.def"
2210 #undef ARM_FPU
2214 /* Supported TLS relocations. */
2216 enum tls_reloc {
2217 TLS_GD32,
2218 TLS_LDM32,
2219 TLS_LDO32,
2220 TLS_IE32,
2221 TLS_LE32,
2222 TLS_DESCSEQ /* GNU scheme */
2225 /* The maximum number of insns to be used when loading a constant. */
2226 inline static int
2227 arm_constant_limit (bool size_p)
2229 return size_p ? 1 : current_tune->constant_limit;
2232 /* Emit an insn that's a simple single-set. Both the operands must be known
2233 to be valid. */
2234 inline static rtx_insn *
2235 emit_set_insn (rtx x, rtx y)
2237 return emit_insn (gen_rtx_SET (x, y));
2240 /* Return the number of bits set in VALUE. */
2241 static unsigned
2242 bit_count (unsigned long value)
2244 unsigned long count = 0;
2246 while (value)
2248 count++;
2249 value &= value - 1; /* Clear the least-significant set bit. */
2252 return count;
2255 typedef struct
2257 machine_mode mode;
2258 const char *name;
2259 } arm_fixed_mode_set;
2261 /* A small helper for setting fixed-point library libfuncs. */
2263 static void
2264 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2265 const char *funcname, const char *modename,
2266 int num_suffix)
2268 char buffer[50];
2270 if (num_suffix == 0)
2271 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2272 else
2273 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2275 set_optab_libfunc (optable, mode, buffer);
2278 static void
2279 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2280 machine_mode from, const char *funcname,
2281 const char *toname, const char *fromname)
2283 char buffer[50];
2284 const char *maybe_suffix_2 = "";
2286 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2287 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2288 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2289 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2290 maybe_suffix_2 = "2";
2292 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2293 maybe_suffix_2);
2295 set_conv_libfunc (optable, to, from, buffer);
2298 /* Set up library functions unique to ARM. */
2300 static void
2301 arm_init_libfuncs (void)
2303 /* For Linux, we have access to kernel support for atomic operations. */
2304 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2305 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2307 /* There are no special library functions unless we are using the
2308 ARM BPABI. */
2309 if (!TARGET_BPABI)
2310 return;
2312 /* The functions below are described in Section 4 of the "Run-Time
2313 ABI for the ARM architecture", Version 1.0. */
2315 /* Double-precision floating-point arithmetic. Table 2. */
2316 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2317 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2318 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2319 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2320 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2322 /* Double-precision comparisons. Table 3. */
2323 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2324 set_optab_libfunc (ne_optab, DFmode, NULL);
2325 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2326 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2327 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2328 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2329 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2331 /* Single-precision floating-point arithmetic. Table 4. */
2332 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2333 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2334 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2335 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2336 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2338 /* Single-precision comparisons. Table 5. */
2339 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2340 set_optab_libfunc (ne_optab, SFmode, NULL);
2341 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2342 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2343 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2344 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2345 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2347 /* Floating-point to integer conversions. Table 6. */
2348 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2349 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2350 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2351 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2352 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2353 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2354 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2355 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2357 /* Conversions between floating types. Table 7. */
2358 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2359 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2361 /* Integer to floating-point conversions. Table 8. */
2362 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2363 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2364 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2365 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2366 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2367 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2368 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2369 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2371 /* Long long. Table 9. */
2372 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2373 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2374 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2375 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2376 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2377 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2378 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2379 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2381 /* Integer (32/32->32) division. \S 4.3.1. */
2382 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2383 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2385 /* The divmod functions are designed so that they can be used for
2386 plain division, even though they return both the quotient and the
2387 remainder. The quotient is returned in the usual location (i.e.,
2388 r0 for SImode, {r0, r1} for DImode), just as would be expected
2389 for an ordinary division routine. Because the AAPCS calling
2390 conventions specify that all of { r0, r1, r2, r3 } are
2391 callee-saved registers, there is no need to tell the compiler
2392 explicitly that those registers are clobbered by these
2393 routines. */
2394 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2395 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2397 /* For SImode division the ABI provides div-without-mod routines,
2398 which are faster. */
2399 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2400 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2402 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2403 divmod libcalls instead. */
2404 set_optab_libfunc (smod_optab, DImode, NULL);
2405 set_optab_libfunc (umod_optab, DImode, NULL);
2406 set_optab_libfunc (smod_optab, SImode, NULL);
2407 set_optab_libfunc (umod_optab, SImode, NULL);
2409 /* Half-precision float operations. The compiler handles all operations
2410 with NULL libfuncs by converting the SFmode. */
2411 switch (arm_fp16_format)
2413 case ARM_FP16_FORMAT_IEEE:
2414 case ARM_FP16_FORMAT_ALTERNATIVE:
2416 /* Conversions. */
2417 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2418 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2419 ? "__gnu_f2h_ieee"
2420 : "__gnu_f2h_alternative"));
2421 set_conv_libfunc (sext_optab, SFmode, HFmode,
2422 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2423 ? "__gnu_h2f_ieee"
2424 : "__gnu_h2f_alternative"));
2426 /* Arithmetic. */
2427 set_optab_libfunc (add_optab, HFmode, NULL);
2428 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2429 set_optab_libfunc (smul_optab, HFmode, NULL);
2430 set_optab_libfunc (neg_optab, HFmode, NULL);
2431 set_optab_libfunc (sub_optab, HFmode, NULL);
2433 /* Comparisons. */
2434 set_optab_libfunc (eq_optab, HFmode, NULL);
2435 set_optab_libfunc (ne_optab, HFmode, NULL);
2436 set_optab_libfunc (lt_optab, HFmode, NULL);
2437 set_optab_libfunc (le_optab, HFmode, NULL);
2438 set_optab_libfunc (ge_optab, HFmode, NULL);
2439 set_optab_libfunc (gt_optab, HFmode, NULL);
2440 set_optab_libfunc (unord_optab, HFmode, NULL);
2441 break;
2443 default:
2444 break;
2447 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2449 const arm_fixed_mode_set fixed_arith_modes[] =
2451 { QQmode, "qq" },
2452 { UQQmode, "uqq" },
2453 { HQmode, "hq" },
2454 { UHQmode, "uhq" },
2455 { SQmode, "sq" },
2456 { USQmode, "usq" },
2457 { DQmode, "dq" },
2458 { UDQmode, "udq" },
2459 { TQmode, "tq" },
2460 { UTQmode, "utq" },
2461 { HAmode, "ha" },
2462 { UHAmode, "uha" },
2463 { SAmode, "sa" },
2464 { USAmode, "usa" },
2465 { DAmode, "da" },
2466 { UDAmode, "uda" },
2467 { TAmode, "ta" },
2468 { UTAmode, "uta" }
2470 const arm_fixed_mode_set fixed_conv_modes[] =
2472 { QQmode, "qq" },
2473 { UQQmode, "uqq" },
2474 { HQmode, "hq" },
2475 { UHQmode, "uhq" },
2476 { SQmode, "sq" },
2477 { USQmode, "usq" },
2478 { DQmode, "dq" },
2479 { UDQmode, "udq" },
2480 { TQmode, "tq" },
2481 { UTQmode, "utq" },
2482 { HAmode, "ha" },
2483 { UHAmode, "uha" },
2484 { SAmode, "sa" },
2485 { USAmode, "usa" },
2486 { DAmode, "da" },
2487 { UDAmode, "uda" },
2488 { TAmode, "ta" },
2489 { UTAmode, "uta" },
2490 { QImode, "qi" },
2491 { HImode, "hi" },
2492 { SImode, "si" },
2493 { DImode, "di" },
2494 { TImode, "ti" },
2495 { SFmode, "sf" },
2496 { DFmode, "df" }
2498 unsigned int i, j;
2500 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2502 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2503 "add", fixed_arith_modes[i].name, 3);
2504 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2505 "ssadd", fixed_arith_modes[i].name, 3);
2506 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2507 "usadd", fixed_arith_modes[i].name, 3);
2508 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2509 "sub", fixed_arith_modes[i].name, 3);
2510 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2511 "sssub", fixed_arith_modes[i].name, 3);
2512 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2513 "ussub", fixed_arith_modes[i].name, 3);
2514 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2515 "mul", fixed_arith_modes[i].name, 3);
2516 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2517 "ssmul", fixed_arith_modes[i].name, 3);
2518 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2519 "usmul", fixed_arith_modes[i].name, 3);
2520 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2521 "div", fixed_arith_modes[i].name, 3);
2522 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2523 "udiv", fixed_arith_modes[i].name, 3);
2524 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2525 "ssdiv", fixed_arith_modes[i].name, 3);
2526 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2527 "usdiv", fixed_arith_modes[i].name, 3);
2528 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2529 "neg", fixed_arith_modes[i].name, 2);
2530 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2531 "ssneg", fixed_arith_modes[i].name, 2);
2532 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2533 "usneg", fixed_arith_modes[i].name, 2);
2534 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2535 "ashl", fixed_arith_modes[i].name, 3);
2536 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2537 "ashr", fixed_arith_modes[i].name, 3);
2538 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2539 "lshr", fixed_arith_modes[i].name, 3);
2540 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2541 "ssashl", fixed_arith_modes[i].name, 3);
2542 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2543 "usashl", fixed_arith_modes[i].name, 3);
2544 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2545 "cmp", fixed_arith_modes[i].name, 2);
2548 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2549 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2551 if (i == j
2552 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2553 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2554 continue;
2556 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2557 fixed_conv_modes[j].mode, "fract",
2558 fixed_conv_modes[i].name,
2559 fixed_conv_modes[j].name);
2560 arm_set_fixed_conv_libfunc (satfract_optab,
2561 fixed_conv_modes[i].mode,
2562 fixed_conv_modes[j].mode, "satfract",
2563 fixed_conv_modes[i].name,
2564 fixed_conv_modes[j].name);
2565 arm_set_fixed_conv_libfunc (fractuns_optab,
2566 fixed_conv_modes[i].mode,
2567 fixed_conv_modes[j].mode, "fractuns",
2568 fixed_conv_modes[i].name,
2569 fixed_conv_modes[j].name);
2570 arm_set_fixed_conv_libfunc (satfractuns_optab,
2571 fixed_conv_modes[i].mode,
2572 fixed_conv_modes[j].mode, "satfractuns",
2573 fixed_conv_modes[i].name,
2574 fixed_conv_modes[j].name);
2578 if (TARGET_AAPCS_BASED)
2579 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2582 /* On AAPCS systems, this is the "struct __va_list". */
2583 static GTY(()) tree va_list_type;
2585 /* Return the type to use as __builtin_va_list. */
2586 static tree
2587 arm_build_builtin_va_list (void)
2589 tree va_list_name;
2590 tree ap_field;
2592 if (!TARGET_AAPCS_BASED)
2593 return std_build_builtin_va_list ();
2595 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2596 defined as:
2598 struct __va_list
2600 void *__ap;
2603 The C Library ABI further reinforces this definition in \S
2604 4.1.
2606 We must follow this definition exactly. The structure tag
2607 name is visible in C++ mangled names, and thus forms a part
2608 of the ABI. The field name may be used by people who
2609 #include <stdarg.h>. */
2610 /* Create the type. */
2611 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2612 /* Give it the required name. */
2613 va_list_name = build_decl (BUILTINS_LOCATION,
2614 TYPE_DECL,
2615 get_identifier ("__va_list"),
2616 va_list_type);
2617 DECL_ARTIFICIAL (va_list_name) = 1;
2618 TYPE_NAME (va_list_type) = va_list_name;
2619 TYPE_STUB_DECL (va_list_type) = va_list_name;
2620 /* Create the __ap field. */
2621 ap_field = build_decl (BUILTINS_LOCATION,
2622 FIELD_DECL,
2623 get_identifier ("__ap"),
2624 ptr_type_node);
2625 DECL_ARTIFICIAL (ap_field) = 1;
2626 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2627 TYPE_FIELDS (va_list_type) = ap_field;
2628 /* Compute its layout. */
2629 layout_type (va_list_type);
2631 return va_list_type;
2634 /* Return an expression of type "void *" pointing to the next
2635 available argument in a variable-argument list. VALIST is the
2636 user-level va_list object, of type __builtin_va_list. */
2637 static tree
2638 arm_extract_valist_ptr (tree valist)
2640 if (TREE_TYPE (valist) == error_mark_node)
2641 return error_mark_node;
2643 /* On an AAPCS target, the pointer is stored within "struct
2644 va_list". */
2645 if (TARGET_AAPCS_BASED)
2647 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2648 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2649 valist, ap_field, NULL_TREE);
2652 return valist;
2655 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2656 static void
2657 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2659 valist = arm_extract_valist_ptr (valist);
2660 std_expand_builtin_va_start (valist, nextarg);
2663 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2664 static tree
2665 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2666 gimple_seq *post_p)
2668 valist = arm_extract_valist_ptr (valist);
2669 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2672 /* Fix up any incompatible options that the user has specified. */
2673 static void
2674 arm_option_override (void)
2676 arm_selected_arch = NULL;
2677 arm_selected_cpu = NULL;
2678 arm_selected_tune = NULL;
2680 if (global_options_set.x_arm_arch_option)
2681 arm_selected_arch = &all_architectures[arm_arch_option];
2683 if (global_options_set.x_arm_cpu_option)
2685 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2686 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2689 if (global_options_set.x_arm_tune_option)
2690 arm_selected_tune = &all_cores[(int) arm_tune_option];
2692 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2693 SUBTARGET_OVERRIDE_OPTIONS;
2694 #endif
2696 if (arm_selected_arch)
2698 if (arm_selected_cpu)
2700 /* Check for conflict between mcpu and march. */
2701 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2703 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2704 arm_selected_cpu->name, arm_selected_arch->name);
2705 /* -march wins for code generation.
2706 -mcpu wins for default tuning. */
2707 if (!arm_selected_tune)
2708 arm_selected_tune = arm_selected_cpu;
2710 arm_selected_cpu = arm_selected_arch;
2712 else
2713 /* -mcpu wins. */
2714 arm_selected_arch = NULL;
2716 else
2717 /* Pick a CPU based on the architecture. */
2718 arm_selected_cpu = arm_selected_arch;
2721 /* If the user did not specify a processor, choose one for them. */
2722 if (!arm_selected_cpu)
2724 const struct processors * sel;
2725 unsigned int sought;
2727 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2728 if (!arm_selected_cpu->name)
2730 #ifdef SUBTARGET_CPU_DEFAULT
2731 /* Use the subtarget default CPU if none was specified by
2732 configure. */
2733 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2734 #endif
2735 /* Default to ARM6. */
2736 if (!arm_selected_cpu->name)
2737 arm_selected_cpu = &all_cores[arm6];
2740 sel = arm_selected_cpu;
2741 insn_flags = sel->flags;
2743 /* Now check to see if the user has specified some command line
2744 switch that require certain abilities from the cpu. */
2745 sought = 0;
2747 if (TARGET_INTERWORK || TARGET_THUMB)
2749 sought |= (FL_THUMB | FL_MODE32);
2751 /* There are no ARM processors that support both APCS-26 and
2752 interworking. Therefore we force FL_MODE26 to be removed
2753 from insn_flags here (if it was set), so that the search
2754 below will always be able to find a compatible processor. */
2755 insn_flags &= ~FL_MODE26;
2758 if (sought != 0 && ((sought & insn_flags) != sought))
2760 /* Try to locate a CPU type that supports all of the abilities
2761 of the default CPU, plus the extra abilities requested by
2762 the user. */
2763 for (sel = all_cores; sel->name != NULL; sel++)
2764 if ((sel->flags & sought) == (sought | insn_flags))
2765 break;
2767 if (sel->name == NULL)
2769 unsigned current_bit_count = 0;
2770 const struct processors * best_fit = NULL;
2772 /* Ideally we would like to issue an error message here
2773 saying that it was not possible to find a CPU compatible
2774 with the default CPU, but which also supports the command
2775 line options specified by the programmer, and so they
2776 ought to use the -mcpu=<name> command line option to
2777 override the default CPU type.
2779 If we cannot find a cpu that has both the
2780 characteristics of the default cpu and the given
2781 command line options we scan the array again looking
2782 for a best match. */
2783 for (sel = all_cores; sel->name != NULL; sel++)
2784 if ((sel->flags & sought) == sought)
2786 unsigned count;
2788 count = bit_count (sel->flags & insn_flags);
2790 if (count >= current_bit_count)
2792 best_fit = sel;
2793 current_bit_count = count;
2797 gcc_assert (best_fit);
2798 sel = best_fit;
2801 arm_selected_cpu = sel;
2805 gcc_assert (arm_selected_cpu);
2806 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2807 if (!arm_selected_tune)
2808 arm_selected_tune = &all_cores[arm_selected_cpu->core];
2810 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2811 insn_flags = arm_selected_cpu->flags;
2812 arm_base_arch = arm_selected_cpu->base_arch;
2814 arm_tune = arm_selected_tune->core;
2815 tune_flags = arm_selected_tune->flags;
2816 current_tune = arm_selected_tune->tune;
2818 /* Make sure that the processor choice does not conflict with any of the
2819 other command line choices. */
2820 if (TARGET_ARM && !(insn_flags & FL_NOTM))
2821 error ("target CPU does not support ARM mode");
2823 /* BPABI targets use linker tricks to allow interworking on cores
2824 without thumb support. */
2825 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2827 warning (0, "target CPU does not support interworking" );
2828 target_flags &= ~MASK_INTERWORK;
2831 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2833 warning (0, "target CPU does not support THUMB instructions");
2834 target_flags &= ~MASK_THUMB;
2837 if (TARGET_APCS_FRAME && TARGET_THUMB)
2839 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2840 target_flags &= ~MASK_APCS_FRAME;
2843 /* Callee super interworking implies thumb interworking. Adding
2844 this to the flags here simplifies the logic elsewhere. */
2845 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2846 target_flags |= MASK_INTERWORK;
2848 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2849 from here where no function is being compiled currently. */
2850 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2851 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2853 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2854 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2856 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2858 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2859 target_flags |= MASK_APCS_FRAME;
2862 if (TARGET_POKE_FUNCTION_NAME)
2863 target_flags |= MASK_APCS_FRAME;
2865 if (TARGET_APCS_REENT && flag_pic)
2866 error ("-fpic and -mapcs-reent are incompatible");
2868 if (TARGET_APCS_REENT)
2869 warning (0, "APCS reentrant code not supported. Ignored");
2871 /* If this target is normally configured to use APCS frames, warn if they
2872 are turned off and debugging is turned on. */
2873 if (TARGET_ARM
2874 && write_symbols != NO_DEBUG
2875 && !TARGET_APCS_FRAME
2876 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2877 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2879 if (TARGET_APCS_FLOAT)
2880 warning (0, "passing floating point arguments in fp regs not yet supported");
2882 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2883 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2884 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2885 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2886 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2887 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2888 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2889 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2890 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2891 arm_arch6m = arm_arch6 && !arm_arch_notm;
2892 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2893 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2894 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2895 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2896 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2898 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2899 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2900 thumb_code = TARGET_ARM == 0;
2901 thumb1_code = TARGET_THUMB1 != 0;
2902 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2903 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2904 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2905 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2906 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2907 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2908 arm_arch_no_volatile_ce = (insn_flags & FL_NO_VOLATILE_CE) != 0;
2909 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2910 arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2911 arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0;
2912 if (arm_restrict_it == 2)
2913 arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2915 if (!TARGET_THUMB2)
2916 arm_restrict_it = 0;
2918 /* If we are not using the default (ARM mode) section anchor offset
2919 ranges, then set the correct ranges now. */
2920 if (TARGET_THUMB1)
2922 /* Thumb-1 LDR instructions cannot have negative offsets.
2923 Permissible positive offset ranges are 5-bit (for byte loads),
2924 6-bit (for halfword loads), or 7-bit (for word loads).
2925 Empirical results suggest a 7-bit anchor range gives the best
2926 overall code size. */
2927 targetm.min_anchor_offset = 0;
2928 targetm.max_anchor_offset = 127;
2930 else if (TARGET_THUMB2)
2932 /* The minimum is set such that the total size of the block
2933 for a particular anchor is 248 + 1 + 4095 bytes, which is
2934 divisible by eight, ensuring natural spacing of anchors. */
2935 targetm.min_anchor_offset = -248;
2936 targetm.max_anchor_offset = 4095;
2939 /* V5 code we generate is completely interworking capable, so we turn off
2940 TARGET_INTERWORK here to avoid many tests later on. */
2942 /* XXX However, we must pass the right pre-processor defines to CPP
2943 or GLD can get confused. This is a hack. */
2944 if (TARGET_INTERWORK)
2945 arm_cpp_interwork = 1;
2947 if (arm_arch5)
2948 target_flags &= ~MASK_INTERWORK;
2950 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2951 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2953 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2954 error ("iwmmxt abi requires an iwmmxt capable cpu");
2956 if (!global_options_set.x_arm_fpu_index)
2958 const char *target_fpu_name;
2959 bool ok;
2961 #ifdef FPUTYPE_DEFAULT
2962 target_fpu_name = FPUTYPE_DEFAULT;
2963 #else
2964 target_fpu_name = "vfp";
2965 #endif
2967 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2968 CL_TARGET);
2969 gcc_assert (ok);
2972 arm_fpu_desc = &all_fpus[arm_fpu_index];
2974 switch (arm_fpu_desc->model)
2976 case ARM_FP_MODEL_VFP:
2977 arm_fpu_attr = FPU_VFP;
2978 break;
2980 default:
2981 gcc_unreachable();
2984 if (TARGET_AAPCS_BASED)
2986 if (TARGET_CALLER_INTERWORKING)
2987 error ("AAPCS does not support -mcaller-super-interworking");
2988 else
2989 if (TARGET_CALLEE_INTERWORKING)
2990 error ("AAPCS does not support -mcallee-super-interworking");
2993 /* iWMMXt and NEON are incompatible. */
2994 if (TARGET_IWMMXT && TARGET_NEON)
2995 error ("iWMMXt and NEON are incompatible");
2997 /* iWMMXt unsupported under Thumb mode. */
2998 if (TARGET_THUMB && TARGET_IWMMXT)
2999 error ("iWMMXt unsupported under Thumb mode");
3001 /* __fp16 support currently assumes the core has ldrh. */
3002 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3003 sorry ("__fp16 and no ldrh");
3005 /* If soft-float is specified then don't use FPU. */
3006 if (TARGET_SOFT_FLOAT)
3007 arm_fpu_attr = FPU_NONE;
3009 if (TARGET_AAPCS_BASED)
3011 if (arm_abi == ARM_ABI_IWMMXT)
3012 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3013 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
3014 && TARGET_HARD_FLOAT
3015 && TARGET_VFP)
3016 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3017 else
3018 arm_pcs_default = ARM_PCS_AAPCS;
3020 else
3022 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
3023 sorry ("-mfloat-abi=hard and VFP");
3025 if (arm_abi == ARM_ABI_APCS)
3026 arm_pcs_default = ARM_PCS_APCS;
3027 else
3028 arm_pcs_default = ARM_PCS_ATPCS;
3031 /* For arm2/3 there is no need to do any scheduling if we are doing
3032 software floating-point. */
3033 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
3034 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3036 /* Use the cp15 method if it is available. */
3037 if (target_thread_pointer == TP_AUTO)
3039 if (arm_arch6k && !TARGET_THUMB1)
3040 target_thread_pointer = TP_CP15;
3041 else
3042 target_thread_pointer = TP_SOFT;
3045 if (TARGET_HARD_TP && TARGET_THUMB1)
3046 error ("can not use -mtp=cp15 with 16-bit Thumb");
3048 /* Override the default structure alignment for AAPCS ABI. */
3049 if (!global_options_set.x_arm_structure_size_boundary)
3051 if (TARGET_AAPCS_BASED)
3052 arm_structure_size_boundary = 8;
3054 else
3056 if (arm_structure_size_boundary != 8
3057 && arm_structure_size_boundary != 32
3058 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3060 if (ARM_DOUBLEWORD_ALIGN)
3061 warning (0,
3062 "structure size boundary can only be set to 8, 32 or 64");
3063 else
3064 warning (0, "structure size boundary can only be set to 8 or 32");
3065 arm_structure_size_boundary
3066 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3070 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
3072 error ("RTP PIC is incompatible with Thumb");
3073 flag_pic = 0;
3076 /* If stack checking is disabled, we can use r10 as the PIC register,
3077 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3078 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3080 if (TARGET_VXWORKS_RTP)
3081 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3082 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3085 if (flag_pic && TARGET_VXWORKS_RTP)
3086 arm_pic_register = 9;
3088 if (arm_pic_register_string != NULL)
3090 int pic_register = decode_reg_name (arm_pic_register_string);
3092 if (!flag_pic)
3093 warning (0, "-mpic-register= is useless without -fpic");
3095 /* Prevent the user from choosing an obviously stupid PIC register. */
3096 else if (pic_register < 0 || call_used_regs[pic_register]
3097 || pic_register == HARD_FRAME_POINTER_REGNUM
3098 || pic_register == STACK_POINTER_REGNUM
3099 || pic_register >= PC_REGNUM
3100 || (TARGET_VXWORKS_RTP
3101 && (unsigned int) pic_register != arm_pic_register))
3102 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3103 else
3104 arm_pic_register = pic_register;
3107 if (TARGET_VXWORKS_RTP
3108 && !global_options_set.x_arm_pic_data_is_text_relative)
3109 arm_pic_data_is_text_relative = 0;
3111 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3112 if (fix_cm3_ldrd == 2)
3114 if (arm_selected_cpu->core == cortexm3)
3115 fix_cm3_ldrd = 1;
3116 else
3117 fix_cm3_ldrd = 0;
3120 /* Enable -munaligned-access by default for
3121 - all ARMv6 architecture-based processors
3122 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3123 - ARMv8 architecture-base processors.
3125 Disable -munaligned-access by default for
3126 - all pre-ARMv6 architecture-based processors
3127 - ARMv6-M architecture-based processors. */
3129 if (unaligned_access == 2)
3131 if (arm_arch6 && (arm_arch_notm || arm_arch7))
3132 unaligned_access = 1;
3133 else
3134 unaligned_access = 0;
3136 else if (unaligned_access == 1
3137 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3139 warning (0, "target CPU does not support unaligned accesses");
3140 unaligned_access = 0;
3143 if (TARGET_THUMB1 && flag_schedule_insns)
3145 /* Don't warn since it's on by default in -O2. */
3146 flag_schedule_insns = 0;
3149 if (optimize_size)
3151 /* If optimizing for size, bump the number of instructions that we
3152 are prepared to conditionally execute (even on a StrongARM). */
3153 max_insns_skipped = 6;
3155 /* For THUMB2, we limit the conditional sequence to one IT block. */
3156 if (TARGET_THUMB2)
3157 max_insns_skipped = MAX_INSN_PER_IT_BLOCK;
3159 else
3160 max_insns_skipped = current_tune->max_insns_skipped;
3162 /* Hot/Cold partitioning is not currently supported, since we can't
3163 handle literal pool placement in that case. */
3164 if (flag_reorder_blocks_and_partition)
3166 inform (input_location,
3167 "-freorder-blocks-and-partition not supported on this architecture");
3168 flag_reorder_blocks_and_partition = 0;
3169 flag_reorder_blocks = 1;
3172 if (flag_pic)
3173 /* Hoisting PIC address calculations more aggressively provides a small,
3174 but measurable, size reduction for PIC code. Therefore, we decrease
3175 the bar for unrestricted expression hoisting to the cost of PIC address
3176 calculation, which is 2 instructions. */
3177 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3178 global_options.x_param_values,
3179 global_options_set.x_param_values);
3181 /* ARM EABI defaults to strict volatile bitfields. */
3182 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3183 && abi_version_at_least(2))
3184 flag_strict_volatile_bitfields = 1;
3186 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
3187 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
3188 if (flag_prefetch_loop_arrays < 0
3189 && HAVE_prefetch
3190 && optimize >= 3
3191 && current_tune->num_prefetch_slots > 0)
3192 flag_prefetch_loop_arrays = 1;
3194 /* Set up parameters to be used in prefetching algorithm. Do not override the
3195 defaults unless we are tuning for a core we have researched values for. */
3196 if (current_tune->num_prefetch_slots > 0)
3197 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3198 current_tune->num_prefetch_slots,
3199 global_options.x_param_values,
3200 global_options_set.x_param_values);
3201 if (current_tune->l1_cache_line_size >= 0)
3202 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3203 current_tune->l1_cache_line_size,
3204 global_options.x_param_values,
3205 global_options_set.x_param_values);
3206 if (current_tune->l1_cache_size >= 0)
3207 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3208 current_tune->l1_cache_size,
3209 global_options.x_param_values,
3210 global_options_set.x_param_values);
3212 /* Use Neon to perform 64-bits operations rather than core
3213 registers. */
3214 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3215 if (use_neon_for_64bits == 1)
3216 prefer_neon_for_64bits = true;
3218 /* Use the alternative scheduling-pressure algorithm by default. */
3219 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3220 global_options.x_param_values,
3221 global_options_set.x_param_values);
3223 /* Look through ready list and all of queue for instructions
3224 relevant for L2 auto-prefetcher. */
3225 int param_sched_autopref_queue_depth;
3226 if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_OFF)
3227 param_sched_autopref_queue_depth = -1;
3228 else if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_RANK)
3229 param_sched_autopref_queue_depth = 0;
3230 else if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_FULL)
3231 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3232 else
3233 gcc_unreachable ();
3234 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3235 param_sched_autopref_queue_depth,
3236 global_options.x_param_values,
3237 global_options_set.x_param_values);
3239 /* Disable shrink-wrap when optimizing function for size, since it tends to
3240 generate additional returns. */
3241 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
3242 flag_shrink_wrap = false;
3243 /* TBD: Dwarf info for apcs frame is not handled yet. */
3244 if (TARGET_APCS_FRAME)
3245 flag_shrink_wrap = false;
3247 /* We only support -mslow-flash-data on armv7-m targets. */
3248 if (target_slow_flash_data
3249 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
3250 || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
3251 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
3253 /* Currently, for slow flash data, we just disable literal pools. */
3254 if (target_slow_flash_data)
3255 arm_disable_literal_pool = true;
3257 /* Thumb2 inline assembly code should always use unified syntax.
3258 This will apply to ARM and Thumb1 eventually. */
3259 if (TARGET_THUMB2)
3260 inline_asm_unified = 1;
3262 /* Disable scheduling fusion by default if it's not armv7 processor
3263 or doesn't prefer ldrd/strd. */
3264 if (flag_schedule_fusion == 2
3265 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3266 flag_schedule_fusion = 0;
3268 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3269 - epilogue_insns - does not accurately model the corresponding insns
3270 emitted in the asm file. In particular, see the comment in thumb_exit
3271 'Find out how many of the (return) argument registers we can corrupt'.
3272 As a consequence, the epilogue may clobber registers without fipa-ra
3273 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3274 TODO: Accurately model clobbers for epilogue_insns and reenable
3275 fipa-ra. */
3276 if (TARGET_THUMB1)
3277 flag_ipa_ra = 0;
3279 /* Register global variables with the garbage collector. */
3280 arm_add_gc_roots ();
3283 static void
3284 arm_add_gc_roots (void)
3286 gcc_obstack_init(&minipool_obstack);
3287 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3290 /* A table of known ARM exception types.
3291 For use with the interrupt function attribute. */
3293 typedef struct
3295 const char *const arg;
3296 const unsigned long return_value;
3298 isr_attribute_arg;
3300 static const isr_attribute_arg isr_attribute_args [] =
3302 { "IRQ", ARM_FT_ISR },
3303 { "irq", ARM_FT_ISR },
3304 { "FIQ", ARM_FT_FIQ },
3305 { "fiq", ARM_FT_FIQ },
3306 { "ABORT", ARM_FT_ISR },
3307 { "abort", ARM_FT_ISR },
3308 { "ABORT", ARM_FT_ISR },
3309 { "abort", ARM_FT_ISR },
3310 { "UNDEF", ARM_FT_EXCEPTION },
3311 { "undef", ARM_FT_EXCEPTION },
3312 { "SWI", ARM_FT_EXCEPTION },
3313 { "swi", ARM_FT_EXCEPTION },
3314 { NULL, ARM_FT_NORMAL }
3317 /* Returns the (interrupt) function type of the current
3318 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3320 static unsigned long
3321 arm_isr_value (tree argument)
3323 const isr_attribute_arg * ptr;
3324 const char * arg;
3326 if (!arm_arch_notm)
3327 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3329 /* No argument - default to IRQ. */
3330 if (argument == NULL_TREE)
3331 return ARM_FT_ISR;
3333 /* Get the value of the argument. */
3334 if (TREE_VALUE (argument) == NULL_TREE
3335 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3336 return ARM_FT_UNKNOWN;
3338 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3340 /* Check it against the list of known arguments. */
3341 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3342 if (streq (arg, ptr->arg))
3343 return ptr->return_value;
3345 /* An unrecognized interrupt type. */
3346 return ARM_FT_UNKNOWN;
3349 /* Computes the type of the current function. */
3351 static unsigned long
3352 arm_compute_func_type (void)
3354 unsigned long type = ARM_FT_UNKNOWN;
3355 tree a;
3356 tree attr;
3358 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3360 /* Decide if the current function is volatile. Such functions
3361 never return, and many memory cycles can be saved by not storing
3362 register values that will never be needed again. This optimization
3363 was added to speed up context switching in a kernel application. */
3364 if (optimize > 0
3365 && (TREE_NOTHROW (current_function_decl)
3366 || !(flag_unwind_tables
3367 || (flag_exceptions
3368 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3369 && TREE_THIS_VOLATILE (current_function_decl))
3370 type |= ARM_FT_VOLATILE;
3372 if (cfun->static_chain_decl != NULL)
3373 type |= ARM_FT_NESTED;
3375 attr = DECL_ATTRIBUTES (current_function_decl);
3377 a = lookup_attribute ("naked", attr);
3378 if (a != NULL_TREE)
3379 type |= ARM_FT_NAKED;
3381 a = lookup_attribute ("isr", attr);
3382 if (a == NULL_TREE)
3383 a = lookup_attribute ("interrupt", attr);
3385 if (a == NULL_TREE)
3386 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3387 else
3388 type |= arm_isr_value (TREE_VALUE (a));
3390 return type;
3393 /* Returns the type of the current function. */
3395 unsigned long
3396 arm_current_func_type (void)
3398 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3399 cfun->machine->func_type = arm_compute_func_type ();
3401 return cfun->machine->func_type;
3404 bool
3405 arm_allocate_stack_slots_for_args (void)
3407 /* Naked functions should not allocate stack slots for arguments. */
3408 return !IS_NAKED (arm_current_func_type ());
3411 static bool
3412 arm_warn_func_return (tree decl)
3414 /* Naked functions are implemented entirely in assembly, including the
3415 return sequence, so suppress warnings about this. */
3416 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3420 /* Output assembler code for a block containing the constant parts
3421 of a trampoline, leaving space for the variable parts.
3423 On the ARM, (if r8 is the static chain regnum, and remembering that
3424 referencing pc adds an offset of 8) the trampoline looks like:
3425 ldr r8, [pc, #0]
3426 ldr pc, [pc]
3427 .word static chain value
3428 .word function's address
3429 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3431 static void
3432 arm_asm_trampoline_template (FILE *f)
3434 if (TARGET_ARM)
3436 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3437 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3439 else if (TARGET_THUMB2)
3441 /* The Thumb-2 trampoline is similar to the arm implementation.
3442 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3443 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3444 STATIC_CHAIN_REGNUM, PC_REGNUM);
3445 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3447 else
3449 ASM_OUTPUT_ALIGN (f, 2);
3450 fprintf (f, "\t.code\t16\n");
3451 fprintf (f, ".Ltrampoline_start:\n");
3452 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3453 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3454 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3455 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3456 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3457 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3459 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3460 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3463 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3465 static void
3466 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3468 rtx fnaddr, mem, a_tramp;
3470 emit_block_move (m_tramp, assemble_trampoline_template (),
3471 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3473 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3474 emit_move_insn (mem, chain_value);
3476 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3477 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3478 emit_move_insn (mem, fnaddr);
3480 a_tramp = XEXP (m_tramp, 0);
3481 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3482 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3483 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3486 /* Thumb trampolines should be entered in thumb mode, so set
3487 the bottom bit of the address. */
3489 static rtx
3490 arm_trampoline_adjust_address (rtx addr)
3492 if (TARGET_THUMB)
3493 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3494 NULL, 0, OPTAB_LIB_WIDEN);
3495 return addr;
3498 /* Return 1 if it is possible to return using a single instruction.
3499 If SIBLING is non-null, this is a test for a return before a sibling
3500 call. SIBLING is the call insn, so we can examine its register usage. */
3503 use_return_insn (int iscond, rtx sibling)
3505 int regno;
3506 unsigned int func_type;
3507 unsigned long saved_int_regs;
3508 unsigned HOST_WIDE_INT stack_adjust;
3509 arm_stack_offsets *offsets;
3511 /* Never use a return instruction before reload has run. */
3512 if (!reload_completed)
3513 return 0;
3515 func_type = arm_current_func_type ();
3517 /* Naked, volatile and stack alignment functions need special
3518 consideration. */
3519 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3520 return 0;
3522 /* So do interrupt functions that use the frame pointer and Thumb
3523 interrupt functions. */
3524 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3525 return 0;
3527 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3528 && !optimize_function_for_size_p (cfun))
3529 return 0;
3531 offsets = arm_get_frame_offsets ();
3532 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3534 /* As do variadic functions. */
3535 if (crtl->args.pretend_args_size
3536 || cfun->machine->uses_anonymous_args
3537 /* Or if the function calls __builtin_eh_return () */
3538 || crtl->calls_eh_return
3539 /* Or if the function calls alloca */
3540 || cfun->calls_alloca
3541 /* Or if there is a stack adjustment. However, if the stack pointer
3542 is saved on the stack, we can use a pre-incrementing stack load. */
3543 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3544 && stack_adjust == 4)))
3545 return 0;
3547 saved_int_regs = offsets->saved_regs_mask;
3549 /* Unfortunately, the insn
3551 ldmib sp, {..., sp, ...}
3553 triggers a bug on most SA-110 based devices, such that the stack
3554 pointer won't be correctly restored if the instruction takes a
3555 page fault. We work around this problem by popping r3 along with
3556 the other registers, since that is never slower than executing
3557 another instruction.
3559 We test for !arm_arch5 here, because code for any architecture
3560 less than this could potentially be run on one of the buggy
3561 chips. */
3562 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3564 /* Validate that r3 is a call-clobbered register (always true in
3565 the default abi) ... */
3566 if (!call_used_regs[3])
3567 return 0;
3569 /* ... that it isn't being used for a return value ... */
3570 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3571 return 0;
3573 /* ... or for a tail-call argument ... */
3574 if (sibling)
3576 gcc_assert (CALL_P (sibling));
3578 if (find_regno_fusage (sibling, USE, 3))
3579 return 0;
3582 /* ... and that there are no call-saved registers in r0-r2
3583 (always true in the default ABI). */
3584 if (saved_int_regs & 0x7)
3585 return 0;
3588 /* Can't be done if interworking with Thumb, and any registers have been
3589 stacked. */
3590 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3591 return 0;
3593 /* On StrongARM, conditional returns are expensive if they aren't
3594 taken and multiple registers have been stacked. */
3595 if (iscond && arm_tune_strongarm)
3597 /* Conditional return when just the LR is stored is a simple
3598 conditional-load instruction, that's not expensive. */
3599 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3600 return 0;
3602 if (flag_pic
3603 && arm_pic_register != INVALID_REGNUM
3604 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3605 return 0;
3608 /* If there are saved registers but the LR isn't saved, then we need
3609 two instructions for the return. */
3610 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3611 return 0;
3613 /* Can't be done if any of the VFP regs are pushed,
3614 since this also requires an insn. */
3615 if (TARGET_HARD_FLOAT && TARGET_VFP)
3616 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3617 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3618 return 0;
3620 if (TARGET_REALLY_IWMMXT)
3621 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3622 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3623 return 0;
3625 return 1;
3628 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3629 shrink-wrapping if possible. This is the case if we need to emit a
3630 prologue, which we can test by looking at the offsets. */
3631 bool
3632 use_simple_return_p (void)
3634 arm_stack_offsets *offsets;
3636 offsets = arm_get_frame_offsets ();
3637 return offsets->outgoing_args != 0;
3640 /* Return TRUE if int I is a valid immediate ARM constant. */
3643 const_ok_for_arm (HOST_WIDE_INT i)
3645 int lowbit;
3647 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3648 be all zero, or all one. */
3649 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3650 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3651 != ((~(unsigned HOST_WIDE_INT) 0)
3652 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3653 return FALSE;
3655 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3657 /* Fast return for 0 and small values. We must do this for zero, since
3658 the code below can't handle that one case. */
3659 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3660 return TRUE;
3662 /* Get the number of trailing zeros. */
3663 lowbit = ffs((int) i) - 1;
3665 /* Only even shifts are allowed in ARM mode so round down to the
3666 nearest even number. */
3667 if (TARGET_ARM)
3668 lowbit &= ~1;
3670 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3671 return TRUE;
3673 if (TARGET_ARM)
3675 /* Allow rotated constants in ARM mode. */
3676 if (lowbit <= 4
3677 && ((i & ~0xc000003f) == 0
3678 || (i & ~0xf000000f) == 0
3679 || (i & ~0xfc000003) == 0))
3680 return TRUE;
3682 else
3684 HOST_WIDE_INT v;
3686 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3687 v = i & 0xff;
3688 v |= v << 16;
3689 if (i == v || i == (v | (v << 8)))
3690 return TRUE;
3692 /* Allow repeated pattern 0xXY00XY00. */
3693 v = i & 0xff00;
3694 v |= v << 16;
3695 if (i == v)
3696 return TRUE;
3699 return FALSE;
3702 /* Return true if I is a valid constant for the operation CODE. */
3704 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3706 if (const_ok_for_arm (i))
3707 return 1;
3709 switch (code)
3711 case SET:
3712 /* See if we can use movw. */
3713 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3714 return 1;
3715 else
3716 /* Otherwise, try mvn. */
3717 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3719 case PLUS:
3720 /* See if we can use addw or subw. */
3721 if (TARGET_THUMB2
3722 && ((i & 0xfffff000) == 0
3723 || ((-i) & 0xfffff000) == 0))
3724 return 1;
3725 /* else fall through. */
3727 case COMPARE:
3728 case EQ:
3729 case NE:
3730 case GT:
3731 case LE:
3732 case LT:
3733 case GE:
3734 case GEU:
3735 case LTU:
3736 case GTU:
3737 case LEU:
3738 case UNORDERED:
3739 case ORDERED:
3740 case UNEQ:
3741 case UNGE:
3742 case UNLT:
3743 case UNGT:
3744 case UNLE:
3745 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3747 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3748 case XOR:
3749 return 0;
3751 case IOR:
3752 if (TARGET_THUMB2)
3753 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3754 return 0;
3756 case AND:
3757 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3759 default:
3760 gcc_unreachable ();
3764 /* Return true if I is a valid di mode constant for the operation CODE. */
3766 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3768 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3769 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3770 rtx hi = GEN_INT (hi_val);
3771 rtx lo = GEN_INT (lo_val);
3773 if (TARGET_THUMB1)
3774 return 0;
3776 switch (code)
3778 case AND:
3779 case IOR:
3780 case XOR:
3781 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3782 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3783 case PLUS:
3784 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3786 default:
3787 return 0;
3791 /* Emit a sequence of insns to handle a large constant.
3792 CODE is the code of the operation required, it can be any of SET, PLUS,
3793 IOR, AND, XOR, MINUS;
3794 MODE is the mode in which the operation is being performed;
3795 VAL is the integer to operate on;
3796 SOURCE is the other operand (a register, or a null-pointer for SET);
3797 SUBTARGETS means it is safe to create scratch registers if that will
3798 either produce a simpler sequence, or we will want to cse the values.
3799 Return value is the number of insns emitted. */
3801 /* ??? Tweak this for thumb2. */
3803 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
3804 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3806 rtx cond;
3808 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3809 cond = COND_EXEC_TEST (PATTERN (insn));
3810 else
3811 cond = NULL_RTX;
3813 if (subtargets || code == SET
3814 || (REG_P (target) && REG_P (source)
3815 && REGNO (target) != REGNO (source)))
3817 /* After arm_reorg has been called, we can't fix up expensive
3818 constants by pushing them into memory so we must synthesize
3819 them in-line, regardless of the cost. This is only likely to
3820 be more costly on chips that have load delay slots and we are
3821 compiling without running the scheduler (so no splitting
3822 occurred before the final instruction emission).
3824 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3826 if (!cfun->machine->after_arm_reorg
3827 && !cond
3828 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3829 1, 0)
3830 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3831 + (code != SET))))
3833 if (code == SET)
3835 /* Currently SET is the only monadic value for CODE, all
3836 the rest are diadic. */
3837 if (TARGET_USE_MOVT)
3838 arm_emit_movpair (target, GEN_INT (val));
3839 else
3840 emit_set_insn (target, GEN_INT (val));
3842 return 1;
3844 else
3846 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3848 if (TARGET_USE_MOVT)
3849 arm_emit_movpair (temp, GEN_INT (val));
3850 else
3851 emit_set_insn (temp, GEN_INT (val));
3853 /* For MINUS, the value is subtracted from, since we never
3854 have subtraction of a constant. */
3855 if (code == MINUS)
3856 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3857 else
3858 emit_set_insn (target,
3859 gen_rtx_fmt_ee (code, mode, source, temp));
3860 return 2;
3865 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3869 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3870 ARM/THUMB2 immediates, and add up to VAL.
3871 Thr function return value gives the number of insns required. */
3872 static int
3873 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3874 struct four_ints *return_sequence)
3876 int best_consecutive_zeros = 0;
3877 int i;
3878 int best_start = 0;
3879 int insns1, insns2;
3880 struct four_ints tmp_sequence;
3882 /* If we aren't targeting ARM, the best place to start is always at
3883 the bottom, otherwise look more closely. */
3884 if (TARGET_ARM)
3886 for (i = 0; i < 32; i += 2)
3888 int consecutive_zeros = 0;
3890 if (!(val & (3 << i)))
3892 while ((i < 32) && !(val & (3 << i)))
3894 consecutive_zeros += 2;
3895 i += 2;
3897 if (consecutive_zeros > best_consecutive_zeros)
3899 best_consecutive_zeros = consecutive_zeros;
3900 best_start = i - consecutive_zeros;
3902 i -= 2;
3907 /* So long as it won't require any more insns to do so, it's
3908 desirable to emit a small constant (in bits 0...9) in the last
3909 insn. This way there is more chance that it can be combined with
3910 a later addressing insn to form a pre-indexed load or store
3911 operation. Consider:
3913 *((volatile int *)0xe0000100) = 1;
3914 *((volatile int *)0xe0000110) = 2;
3916 We want this to wind up as:
3918 mov rA, #0xe0000000
3919 mov rB, #1
3920 str rB, [rA, #0x100]
3921 mov rB, #2
3922 str rB, [rA, #0x110]
3924 rather than having to synthesize both large constants from scratch.
3926 Therefore, we calculate how many insns would be required to emit
3927 the constant starting from `best_start', and also starting from
3928 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3929 yield a shorter sequence, we may as well use zero. */
3930 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3931 if (best_start != 0
3932 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3934 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3935 if (insns2 <= insns1)
3937 *return_sequence = tmp_sequence;
3938 insns1 = insns2;
3942 return insns1;
3945 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3946 static int
3947 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3948 struct four_ints *return_sequence, int i)
3950 int remainder = val & 0xffffffff;
3951 int insns = 0;
3953 /* Try and find a way of doing the job in either two or three
3954 instructions.
3956 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3957 location. We start at position I. This may be the MSB, or
3958 optimial_immediate_sequence may have positioned it at the largest block
3959 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3960 wrapping around to the top of the word when we drop off the bottom.
3961 In the worst case this code should produce no more than four insns.
3963 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3964 constants, shifted to any arbitrary location. We should always start
3965 at the MSB. */
3968 int end;
3969 unsigned int b1, b2, b3, b4;
3970 unsigned HOST_WIDE_INT result;
3971 int loc;
3973 gcc_assert (insns < 4);
3975 if (i <= 0)
3976 i += 32;
3978 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3979 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3981 loc = i;
3982 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3983 /* We can use addw/subw for the last 12 bits. */
3984 result = remainder;
3985 else
3987 /* Use an 8-bit shifted/rotated immediate. */
3988 end = i - 8;
3989 if (end < 0)
3990 end += 32;
3991 result = remainder & ((0x0ff << end)
3992 | ((i < end) ? (0xff >> (32 - end))
3993 : 0));
3994 i -= 8;
3997 else
3999 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4000 arbitrary shifts. */
4001 i -= TARGET_ARM ? 2 : 1;
4002 continue;
4005 /* Next, see if we can do a better job with a thumb2 replicated
4006 constant.
4008 We do it this way around to catch the cases like 0x01F001E0 where
4009 two 8-bit immediates would work, but a replicated constant would
4010 make it worse.
4012 TODO: 16-bit constants that don't clear all the bits, but still win.
4013 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4014 if (TARGET_THUMB2)
4016 b1 = (remainder & 0xff000000) >> 24;
4017 b2 = (remainder & 0x00ff0000) >> 16;
4018 b3 = (remainder & 0x0000ff00) >> 8;
4019 b4 = remainder & 0xff;
4021 if (loc > 24)
4023 /* The 8-bit immediate already found clears b1 (and maybe b2),
4024 but must leave b3 and b4 alone. */
4026 /* First try to find a 32-bit replicated constant that clears
4027 almost everything. We can assume that we can't do it in one,
4028 or else we wouldn't be here. */
4029 unsigned int tmp = b1 & b2 & b3 & b4;
4030 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4031 + (tmp << 24);
4032 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4033 + (tmp == b3) + (tmp == b4);
4034 if (tmp
4035 && (matching_bytes >= 3
4036 || (matching_bytes == 2
4037 && const_ok_for_op (remainder & ~tmp2, code))))
4039 /* At least 3 of the bytes match, and the fourth has at
4040 least as many bits set, or two of the bytes match
4041 and it will only require one more insn to finish. */
4042 result = tmp2;
4043 i = tmp != b1 ? 32
4044 : tmp != b2 ? 24
4045 : tmp != b3 ? 16
4046 : 8;
4049 /* Second, try to find a 16-bit replicated constant that can
4050 leave three of the bytes clear. If b2 or b4 is already
4051 zero, then we can. If the 8-bit from above would not
4052 clear b2 anyway, then we still win. */
4053 else if (b1 == b3 && (!b2 || !b4
4054 || (remainder & 0x00ff0000 & ~result)))
4056 result = remainder & 0xff00ff00;
4057 i = 24;
4060 else if (loc > 16)
4062 /* The 8-bit immediate already found clears b2 (and maybe b3)
4063 and we don't get here unless b1 is alredy clear, but it will
4064 leave b4 unchanged. */
4066 /* If we can clear b2 and b4 at once, then we win, since the
4067 8-bits couldn't possibly reach that far. */
4068 if (b2 == b4)
4070 result = remainder & 0x00ff00ff;
4071 i = 16;
4076 return_sequence->i[insns++] = result;
4077 remainder &= ~result;
4079 if (code == SET || code == MINUS)
4080 code = PLUS;
4082 while (remainder);
4084 return insns;
4087 /* Emit an instruction with the indicated PATTERN. If COND is
4088 non-NULL, conditionalize the execution of the instruction on COND
4089 being true. */
4091 static void
4092 emit_constant_insn (rtx cond, rtx pattern)
4094 if (cond)
4095 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4096 emit_insn (pattern);
4099 /* As above, but extra parameter GENERATE which, if clear, suppresses
4100 RTL generation. */
4102 static int
4103 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4104 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
4105 int generate)
4107 int can_invert = 0;
4108 int can_negate = 0;
4109 int final_invert = 0;
4110 int i;
4111 int set_sign_bit_copies = 0;
4112 int clear_sign_bit_copies = 0;
4113 int clear_zero_bit_copies = 0;
4114 int set_zero_bit_copies = 0;
4115 int insns = 0, neg_insns, inv_insns;
4116 unsigned HOST_WIDE_INT temp1, temp2;
4117 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4118 struct four_ints *immediates;
4119 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4121 /* Find out which operations are safe for a given CODE. Also do a quick
4122 check for degenerate cases; these can occur when DImode operations
4123 are split. */
4124 switch (code)
4126 case SET:
4127 can_invert = 1;
4128 break;
4130 case PLUS:
4131 can_negate = 1;
4132 break;
4134 case IOR:
4135 if (remainder == 0xffffffff)
4137 if (generate)
4138 emit_constant_insn (cond,
4139 gen_rtx_SET (target,
4140 GEN_INT (ARM_SIGN_EXTEND (val))));
4141 return 1;
4144 if (remainder == 0)
4146 if (reload_completed && rtx_equal_p (target, source))
4147 return 0;
4149 if (generate)
4150 emit_constant_insn (cond, gen_rtx_SET (target, source));
4151 return 1;
4153 break;
4155 case AND:
4156 if (remainder == 0)
4158 if (generate)
4159 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4160 return 1;
4162 if (remainder == 0xffffffff)
4164 if (reload_completed && rtx_equal_p (target, source))
4165 return 0;
4166 if (generate)
4167 emit_constant_insn (cond, gen_rtx_SET (target, source));
4168 return 1;
4170 can_invert = 1;
4171 break;
4173 case XOR:
4174 if (remainder == 0)
4176 if (reload_completed && rtx_equal_p (target, source))
4177 return 0;
4178 if (generate)
4179 emit_constant_insn (cond, gen_rtx_SET (target, source));
4180 return 1;
4183 if (remainder == 0xffffffff)
4185 if (generate)
4186 emit_constant_insn (cond,
4187 gen_rtx_SET (target,
4188 gen_rtx_NOT (mode, source)));
4189 return 1;
4191 final_invert = 1;
4192 break;
4194 case MINUS:
4195 /* We treat MINUS as (val - source), since (source - val) is always
4196 passed as (source + (-val)). */
4197 if (remainder == 0)
4199 if (generate)
4200 emit_constant_insn (cond,
4201 gen_rtx_SET (target,
4202 gen_rtx_NEG (mode, source)));
4203 return 1;
4205 if (const_ok_for_arm (val))
4207 if (generate)
4208 emit_constant_insn (cond,
4209 gen_rtx_SET (target,
4210 gen_rtx_MINUS (mode, GEN_INT (val),
4211 source)));
4212 return 1;
4215 break;
4217 default:
4218 gcc_unreachable ();
4221 /* If we can do it in one insn get out quickly. */
4222 if (const_ok_for_op (val, code))
4224 if (generate)
4225 emit_constant_insn (cond,
4226 gen_rtx_SET (target,
4227 (source
4228 ? gen_rtx_fmt_ee (code, mode, source,
4229 GEN_INT (val))
4230 : GEN_INT (val))));
4231 return 1;
4234 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4235 insn. */
4236 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4237 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4239 if (generate)
4241 if (mode == SImode && i == 16)
4242 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4243 smaller insn. */
4244 emit_constant_insn (cond,
4245 gen_zero_extendhisi2
4246 (target, gen_lowpart (HImode, source)));
4247 else
4248 /* Extz only supports SImode, but we can coerce the operands
4249 into that mode. */
4250 emit_constant_insn (cond,
4251 gen_extzv_t2 (gen_lowpart (SImode, target),
4252 gen_lowpart (SImode, source),
4253 GEN_INT (i), const0_rtx));
4256 return 1;
4259 /* Calculate a few attributes that may be useful for specific
4260 optimizations. */
4261 /* Count number of leading zeros. */
4262 for (i = 31; i >= 0; i--)
4264 if ((remainder & (1 << i)) == 0)
4265 clear_sign_bit_copies++;
4266 else
4267 break;
4270 /* Count number of leading 1's. */
4271 for (i = 31; i >= 0; i--)
4273 if ((remainder & (1 << i)) != 0)
4274 set_sign_bit_copies++;
4275 else
4276 break;
4279 /* Count number of trailing zero's. */
4280 for (i = 0; i <= 31; i++)
4282 if ((remainder & (1 << i)) == 0)
4283 clear_zero_bit_copies++;
4284 else
4285 break;
4288 /* Count number of trailing 1's. */
4289 for (i = 0; i <= 31; i++)
4291 if ((remainder & (1 << i)) != 0)
4292 set_zero_bit_copies++;
4293 else
4294 break;
4297 switch (code)
4299 case SET:
4300 /* See if we can do this by sign_extending a constant that is known
4301 to be negative. This is a good, way of doing it, since the shift
4302 may well merge into a subsequent insn. */
4303 if (set_sign_bit_copies > 1)
4305 if (const_ok_for_arm
4306 (temp1 = ARM_SIGN_EXTEND (remainder
4307 << (set_sign_bit_copies - 1))))
4309 if (generate)
4311 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4312 emit_constant_insn (cond,
4313 gen_rtx_SET (new_src, GEN_INT (temp1)));
4314 emit_constant_insn (cond,
4315 gen_ashrsi3 (target, new_src,
4316 GEN_INT (set_sign_bit_copies - 1)));
4318 return 2;
4320 /* For an inverted constant, we will need to set the low bits,
4321 these will be shifted out of harm's way. */
4322 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4323 if (const_ok_for_arm (~temp1))
4325 if (generate)
4327 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4328 emit_constant_insn (cond,
4329 gen_rtx_SET (new_src, GEN_INT (temp1)));
4330 emit_constant_insn (cond,
4331 gen_ashrsi3 (target, new_src,
4332 GEN_INT (set_sign_bit_copies - 1)));
4334 return 2;
4338 /* See if we can calculate the value as the difference between two
4339 valid immediates. */
4340 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4342 int topshift = clear_sign_bit_copies & ~1;
4344 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4345 & (0xff000000 >> topshift));
4347 /* If temp1 is zero, then that means the 9 most significant
4348 bits of remainder were 1 and we've caused it to overflow.
4349 When topshift is 0 we don't need to do anything since we
4350 can borrow from 'bit 32'. */
4351 if (temp1 == 0 && topshift != 0)
4352 temp1 = 0x80000000 >> (topshift - 1);
4354 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4356 if (const_ok_for_arm (temp2))
4358 if (generate)
4360 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4361 emit_constant_insn (cond,
4362 gen_rtx_SET (new_src, GEN_INT (temp1)));
4363 emit_constant_insn (cond,
4364 gen_addsi3 (target, new_src,
4365 GEN_INT (-temp2)));
4368 return 2;
4372 /* See if we can generate this by setting the bottom (or the top)
4373 16 bits, and then shifting these into the other half of the
4374 word. We only look for the simplest cases, to do more would cost
4375 too much. Be careful, however, not to generate this when the
4376 alternative would take fewer insns. */
4377 if (val & 0xffff0000)
4379 temp1 = remainder & 0xffff0000;
4380 temp2 = remainder & 0x0000ffff;
4382 /* Overlaps outside this range are best done using other methods. */
4383 for (i = 9; i < 24; i++)
4385 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4386 && !const_ok_for_arm (temp2))
4388 rtx new_src = (subtargets
4389 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4390 : target);
4391 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4392 source, subtargets, generate);
4393 source = new_src;
4394 if (generate)
4395 emit_constant_insn
4396 (cond,
4397 gen_rtx_SET
4398 (target,
4399 gen_rtx_IOR (mode,
4400 gen_rtx_ASHIFT (mode, source,
4401 GEN_INT (i)),
4402 source)));
4403 return insns + 1;
4407 /* Don't duplicate cases already considered. */
4408 for (i = 17; i < 24; i++)
4410 if (((temp1 | (temp1 >> i)) == remainder)
4411 && !const_ok_for_arm (temp1))
4413 rtx new_src = (subtargets
4414 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4415 : target);
4416 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4417 source, subtargets, generate);
4418 source = new_src;
4419 if (generate)
4420 emit_constant_insn
4421 (cond,
4422 gen_rtx_SET (target,
4423 gen_rtx_IOR
4424 (mode,
4425 gen_rtx_LSHIFTRT (mode, source,
4426 GEN_INT (i)),
4427 source)));
4428 return insns + 1;
4432 break;
4434 case IOR:
4435 case XOR:
4436 /* If we have IOR or XOR, and the constant can be loaded in a
4437 single instruction, and we can find a temporary to put it in,
4438 then this can be done in two instructions instead of 3-4. */
4439 if (subtargets
4440 /* TARGET can't be NULL if SUBTARGETS is 0 */
4441 || (reload_completed && !reg_mentioned_p (target, source)))
4443 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4445 if (generate)
4447 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4449 emit_constant_insn (cond,
4450 gen_rtx_SET (sub, GEN_INT (val)));
4451 emit_constant_insn (cond,
4452 gen_rtx_SET (target,
4453 gen_rtx_fmt_ee (code, mode,
4454 source, sub)));
4456 return 2;
4460 if (code == XOR)
4461 break;
4463 /* Convert.
4464 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4465 and the remainder 0s for e.g. 0xfff00000)
4466 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4468 This can be done in 2 instructions by using shifts with mov or mvn.
4469 e.g. for
4470 x = x | 0xfff00000;
4471 we generate.
4472 mvn r0, r0, asl #12
4473 mvn r0, r0, lsr #12 */
4474 if (set_sign_bit_copies > 8
4475 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4477 if (generate)
4479 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4480 rtx shift = GEN_INT (set_sign_bit_copies);
4482 emit_constant_insn
4483 (cond,
4484 gen_rtx_SET (sub,
4485 gen_rtx_NOT (mode,
4486 gen_rtx_ASHIFT (mode,
4487 source,
4488 shift))));
4489 emit_constant_insn
4490 (cond,
4491 gen_rtx_SET (target,
4492 gen_rtx_NOT (mode,
4493 gen_rtx_LSHIFTRT (mode, sub,
4494 shift))));
4496 return 2;
4499 /* Convert
4500 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4502 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4504 For eg. r0 = r0 | 0xfff
4505 mvn r0, r0, lsr #12
4506 mvn r0, r0, asl #12
4509 if (set_zero_bit_copies > 8
4510 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4512 if (generate)
4514 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4515 rtx shift = GEN_INT (set_zero_bit_copies);
4517 emit_constant_insn
4518 (cond,
4519 gen_rtx_SET (sub,
4520 gen_rtx_NOT (mode,
4521 gen_rtx_LSHIFTRT (mode,
4522 source,
4523 shift))));
4524 emit_constant_insn
4525 (cond,
4526 gen_rtx_SET (target,
4527 gen_rtx_NOT (mode,
4528 gen_rtx_ASHIFT (mode, sub,
4529 shift))));
4531 return 2;
4534 /* This will never be reached for Thumb2 because orn is a valid
4535 instruction. This is for Thumb1 and the ARM 32 bit cases.
4537 x = y | constant (such that ~constant is a valid constant)
4538 Transform this to
4539 x = ~(~y & ~constant).
4541 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4543 if (generate)
4545 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4546 emit_constant_insn (cond,
4547 gen_rtx_SET (sub,
4548 gen_rtx_NOT (mode, source)));
4549 source = sub;
4550 if (subtargets)
4551 sub = gen_reg_rtx (mode);
4552 emit_constant_insn (cond,
4553 gen_rtx_SET (sub,
4554 gen_rtx_AND (mode, source,
4555 GEN_INT (temp1))));
4556 emit_constant_insn (cond,
4557 gen_rtx_SET (target,
4558 gen_rtx_NOT (mode, sub)));
4560 return 3;
4562 break;
4564 case AND:
4565 /* See if two shifts will do 2 or more insn's worth of work. */
4566 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4568 HOST_WIDE_INT shift_mask = ((0xffffffff
4569 << (32 - clear_sign_bit_copies))
4570 & 0xffffffff);
4572 if ((remainder | shift_mask) != 0xffffffff)
4574 HOST_WIDE_INT new_val
4575 = ARM_SIGN_EXTEND (remainder | shift_mask);
4577 if (generate)
4579 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4580 insns = arm_gen_constant (AND, SImode, cond, new_val,
4581 new_src, source, subtargets, 1);
4582 source = new_src;
4584 else
4586 rtx targ = subtargets ? NULL_RTX : target;
4587 insns = arm_gen_constant (AND, mode, cond, new_val,
4588 targ, source, subtargets, 0);
4592 if (generate)
4594 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4595 rtx shift = GEN_INT (clear_sign_bit_copies);
4597 emit_insn (gen_ashlsi3 (new_src, source, shift));
4598 emit_insn (gen_lshrsi3 (target, new_src, shift));
4601 return insns + 2;
4604 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4606 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4608 if ((remainder | shift_mask) != 0xffffffff)
4610 HOST_WIDE_INT new_val
4611 = ARM_SIGN_EXTEND (remainder | shift_mask);
4612 if (generate)
4614 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4616 insns = arm_gen_constant (AND, mode, cond, new_val,
4617 new_src, source, subtargets, 1);
4618 source = new_src;
4620 else
4622 rtx targ = subtargets ? NULL_RTX : target;
4624 insns = arm_gen_constant (AND, mode, cond, new_val,
4625 targ, source, subtargets, 0);
4629 if (generate)
4631 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4632 rtx shift = GEN_INT (clear_zero_bit_copies);
4634 emit_insn (gen_lshrsi3 (new_src, source, shift));
4635 emit_insn (gen_ashlsi3 (target, new_src, shift));
4638 return insns + 2;
4641 break;
4643 default:
4644 break;
4647 /* Calculate what the instruction sequences would be if we generated it
4648 normally, negated, or inverted. */
4649 if (code == AND)
4650 /* AND cannot be split into multiple insns, so invert and use BIC. */
4651 insns = 99;
4652 else
4653 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4655 if (can_negate)
4656 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4657 &neg_immediates);
4658 else
4659 neg_insns = 99;
4661 if (can_invert || final_invert)
4662 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4663 &inv_immediates);
4664 else
4665 inv_insns = 99;
4667 immediates = &pos_immediates;
4669 /* Is the negated immediate sequence more efficient? */
4670 if (neg_insns < insns && neg_insns <= inv_insns)
4672 insns = neg_insns;
4673 immediates = &neg_immediates;
4675 else
4676 can_negate = 0;
4678 /* Is the inverted immediate sequence more efficient?
4679 We must allow for an extra NOT instruction for XOR operations, although
4680 there is some chance that the final 'mvn' will get optimized later. */
4681 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4683 insns = inv_insns;
4684 immediates = &inv_immediates;
4686 else
4688 can_invert = 0;
4689 final_invert = 0;
4692 /* Now output the chosen sequence as instructions. */
4693 if (generate)
4695 for (i = 0; i < insns; i++)
4697 rtx new_src, temp1_rtx;
4699 temp1 = immediates->i[i];
4701 if (code == SET || code == MINUS)
4702 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4703 else if ((final_invert || i < (insns - 1)) && subtargets)
4704 new_src = gen_reg_rtx (mode);
4705 else
4706 new_src = target;
4708 if (can_invert)
4709 temp1 = ~temp1;
4710 else if (can_negate)
4711 temp1 = -temp1;
4713 temp1 = trunc_int_for_mode (temp1, mode);
4714 temp1_rtx = GEN_INT (temp1);
4716 if (code == SET)
4718 else if (code == MINUS)
4719 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4720 else
4721 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4723 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
4724 source = new_src;
4726 if (code == SET)
4728 can_negate = can_invert;
4729 can_invert = 0;
4730 code = PLUS;
4732 else if (code == MINUS)
4733 code = PLUS;
4737 if (final_invert)
4739 if (generate)
4740 emit_constant_insn (cond, gen_rtx_SET (target,
4741 gen_rtx_NOT (mode, source)));
4742 insns++;
4745 return insns;
4748 /* Canonicalize a comparison so that we are more likely to recognize it.
4749 This can be done for a few constant compares, where we can make the
4750 immediate value easier to load. */
4752 static void
4753 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4754 bool op0_preserve_value)
4756 machine_mode mode;
4757 unsigned HOST_WIDE_INT i, maxval;
4759 mode = GET_MODE (*op0);
4760 if (mode == VOIDmode)
4761 mode = GET_MODE (*op1);
4763 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4765 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4766 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4767 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4768 for GTU/LEU in Thumb mode. */
4769 if (mode == DImode)
4772 if (*code == GT || *code == LE
4773 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4775 /* Missing comparison. First try to use an available
4776 comparison. */
4777 if (CONST_INT_P (*op1))
4779 i = INTVAL (*op1);
4780 switch (*code)
4782 case GT:
4783 case LE:
4784 if (i != maxval
4785 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4787 *op1 = GEN_INT (i + 1);
4788 *code = *code == GT ? GE : LT;
4789 return;
4791 break;
4792 case GTU:
4793 case LEU:
4794 if (i != ~((unsigned HOST_WIDE_INT) 0)
4795 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4797 *op1 = GEN_INT (i + 1);
4798 *code = *code == GTU ? GEU : LTU;
4799 return;
4801 break;
4802 default:
4803 gcc_unreachable ();
4807 /* If that did not work, reverse the condition. */
4808 if (!op0_preserve_value)
4810 std::swap (*op0, *op1);
4811 *code = (int)swap_condition ((enum rtx_code)*code);
4814 return;
4817 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4818 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4819 to facilitate possible combining with a cmp into 'ands'. */
4820 if (mode == SImode
4821 && GET_CODE (*op0) == ZERO_EXTEND
4822 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4823 && GET_MODE (XEXP (*op0, 0)) == QImode
4824 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4825 && subreg_lowpart_p (XEXP (*op0, 0))
4826 && *op1 == const0_rtx)
4827 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4828 GEN_INT (255));
4830 /* Comparisons smaller than DImode. Only adjust comparisons against
4831 an out-of-range constant. */
4832 if (!CONST_INT_P (*op1)
4833 || const_ok_for_arm (INTVAL (*op1))
4834 || const_ok_for_arm (- INTVAL (*op1)))
4835 return;
4837 i = INTVAL (*op1);
4839 switch (*code)
4841 case EQ:
4842 case NE:
4843 return;
4845 case GT:
4846 case LE:
4847 if (i != maxval
4848 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4850 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
4851 *code = *code == GT ? GE : LT;
4852 return;
4854 break;
4856 case GE:
4857 case LT:
4858 if (i != ~maxval
4859 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4861 *op1 = GEN_INT (i - 1);
4862 *code = *code == GE ? GT : LE;
4863 return;
4865 break;
4867 case GTU:
4868 case LEU:
4869 if (i != ~((unsigned HOST_WIDE_INT) 0)
4870 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4872 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
4873 *code = *code == GTU ? GEU : LTU;
4874 return;
4876 break;
4878 case GEU:
4879 case LTU:
4880 if (i != 0
4881 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4883 *op1 = GEN_INT (i - 1);
4884 *code = *code == GEU ? GTU : LEU;
4885 return;
4887 break;
4889 default:
4890 gcc_unreachable ();
4895 /* Define how to find the value returned by a function. */
4897 static rtx
4898 arm_function_value(const_tree type, const_tree func,
4899 bool outgoing ATTRIBUTE_UNUSED)
4901 machine_mode mode;
4902 int unsignedp ATTRIBUTE_UNUSED;
4903 rtx r ATTRIBUTE_UNUSED;
4905 mode = TYPE_MODE (type);
4907 if (TARGET_AAPCS_BASED)
4908 return aapcs_allocate_return_reg (mode, type, func);
4910 /* Promote integer types. */
4911 if (INTEGRAL_TYPE_P (type))
4912 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4914 /* Promotes small structs returned in a register to full-word size
4915 for big-endian AAPCS. */
4916 if (arm_return_in_msb (type))
4918 HOST_WIDE_INT size = int_size_in_bytes (type);
4919 if (size % UNITS_PER_WORD != 0)
4921 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4922 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4926 return arm_libcall_value_1 (mode);
4929 /* libcall hashtable helpers. */
4931 struct libcall_hasher : typed_noop_remove <rtx_def>
4933 typedef const rtx_def *value_type;
4934 typedef const rtx_def *compare_type;
4935 static inline hashval_t hash (const rtx_def *);
4936 static inline bool equal (const rtx_def *, const rtx_def *);
4937 static inline void remove (rtx_def *);
4940 inline bool
4941 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
4943 return rtx_equal_p (p1, p2);
4946 inline hashval_t
4947 libcall_hasher::hash (const rtx_def *p1)
4949 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4952 typedef hash_table<libcall_hasher> libcall_table_type;
4954 static void
4955 add_libcall (libcall_table_type *htab, rtx libcall)
4957 *htab->find_slot (libcall, INSERT) = libcall;
4960 static bool
4961 arm_libcall_uses_aapcs_base (const_rtx libcall)
4963 static bool init_done = false;
4964 static libcall_table_type *libcall_htab = NULL;
4966 if (!init_done)
4968 init_done = true;
4970 libcall_htab = new libcall_table_type (31);
4971 add_libcall (libcall_htab,
4972 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4973 add_libcall (libcall_htab,
4974 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4975 add_libcall (libcall_htab,
4976 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4977 add_libcall (libcall_htab,
4978 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4980 add_libcall (libcall_htab,
4981 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4982 add_libcall (libcall_htab,
4983 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4984 add_libcall (libcall_htab,
4985 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4986 add_libcall (libcall_htab,
4987 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4989 add_libcall (libcall_htab,
4990 convert_optab_libfunc (sext_optab, SFmode, HFmode));
4991 add_libcall (libcall_htab,
4992 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4993 add_libcall (libcall_htab,
4994 convert_optab_libfunc (sfix_optab, SImode, DFmode));
4995 add_libcall (libcall_htab,
4996 convert_optab_libfunc (ufix_optab, SImode, DFmode));
4997 add_libcall (libcall_htab,
4998 convert_optab_libfunc (sfix_optab, DImode, DFmode));
4999 add_libcall (libcall_htab,
5000 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5001 add_libcall (libcall_htab,
5002 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5003 add_libcall (libcall_htab,
5004 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5006 /* Values from double-precision helper functions are returned in core
5007 registers if the selected core only supports single-precision
5008 arithmetic, even if we are using the hard-float ABI. The same is
5009 true for single-precision helpers, but we will never be using the
5010 hard-float ABI on a CPU which doesn't support single-precision
5011 operations in hardware. */
5012 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5013 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5014 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5015 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5016 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5017 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5018 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5019 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5020 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5021 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5022 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5023 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5024 SFmode));
5025 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5026 DFmode));
5029 return libcall && libcall_htab->find (libcall) != NULL;
5032 static rtx
5033 arm_libcall_value_1 (machine_mode mode)
5035 if (TARGET_AAPCS_BASED)
5036 return aapcs_libcall_value (mode);
5037 else if (TARGET_IWMMXT_ABI
5038 && arm_vector_mode_supported_p (mode))
5039 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5040 else
5041 return gen_rtx_REG (mode, ARG_REGISTER (1));
5044 /* Define how to find the value returned by a library function
5045 assuming the value has mode MODE. */
5047 static rtx
5048 arm_libcall_value (machine_mode mode, const_rtx libcall)
5050 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5051 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5053 /* The following libcalls return their result in integer registers,
5054 even though they return a floating point value. */
5055 if (arm_libcall_uses_aapcs_base (libcall))
5056 return gen_rtx_REG (mode, ARG_REGISTER(1));
5060 return arm_libcall_value_1 (mode);
5063 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5065 static bool
5066 arm_function_value_regno_p (const unsigned int regno)
5068 if (regno == ARG_REGISTER (1)
5069 || (TARGET_32BIT
5070 && TARGET_AAPCS_BASED
5071 && TARGET_VFP
5072 && TARGET_HARD_FLOAT
5073 && regno == FIRST_VFP_REGNUM)
5074 || (TARGET_IWMMXT_ABI
5075 && regno == FIRST_IWMMXT_REGNUM))
5076 return true;
5078 return false;
5081 /* Determine the amount of memory needed to store the possible return
5082 registers of an untyped call. */
5084 arm_apply_result_size (void)
5086 int size = 16;
5088 if (TARGET_32BIT)
5090 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
5091 size += 32;
5092 if (TARGET_IWMMXT_ABI)
5093 size += 8;
5096 return size;
5099 /* Decide whether TYPE should be returned in memory (true)
5100 or in a register (false). FNTYPE is the type of the function making
5101 the call. */
5102 static bool
5103 arm_return_in_memory (const_tree type, const_tree fntype)
5105 HOST_WIDE_INT size;
5107 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5109 if (TARGET_AAPCS_BASED)
5111 /* Simple, non-aggregate types (ie not including vectors and
5112 complex) are always returned in a register (or registers).
5113 We don't care about which register here, so we can short-cut
5114 some of the detail. */
5115 if (!AGGREGATE_TYPE_P (type)
5116 && TREE_CODE (type) != VECTOR_TYPE
5117 && TREE_CODE (type) != COMPLEX_TYPE)
5118 return false;
5120 /* Any return value that is no larger than one word can be
5121 returned in r0. */
5122 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5123 return false;
5125 /* Check any available co-processors to see if they accept the
5126 type as a register candidate (VFP, for example, can return
5127 some aggregates in consecutive registers). These aren't
5128 available if the call is variadic. */
5129 if (aapcs_select_return_coproc (type, fntype) >= 0)
5130 return false;
5132 /* Vector values should be returned using ARM registers, not
5133 memory (unless they're over 16 bytes, which will break since
5134 we only have four call-clobbered registers to play with). */
5135 if (TREE_CODE (type) == VECTOR_TYPE)
5136 return (size < 0 || size > (4 * UNITS_PER_WORD));
5138 /* The rest go in memory. */
5139 return true;
5142 if (TREE_CODE (type) == VECTOR_TYPE)
5143 return (size < 0 || size > (4 * UNITS_PER_WORD));
5145 if (!AGGREGATE_TYPE_P (type) &&
5146 (TREE_CODE (type) != VECTOR_TYPE))
5147 /* All simple types are returned in registers. */
5148 return false;
5150 if (arm_abi != ARM_ABI_APCS)
5152 /* ATPCS and later return aggregate types in memory only if they are
5153 larger than a word (or are variable size). */
5154 return (size < 0 || size > UNITS_PER_WORD);
5157 /* For the arm-wince targets we choose to be compatible with Microsoft's
5158 ARM and Thumb compilers, which always return aggregates in memory. */
5159 #ifndef ARM_WINCE
5160 /* All structures/unions bigger than one word are returned in memory.
5161 Also catch the case where int_size_in_bytes returns -1. In this case
5162 the aggregate is either huge or of variable size, and in either case
5163 we will want to return it via memory and not in a register. */
5164 if (size < 0 || size > UNITS_PER_WORD)
5165 return true;
5167 if (TREE_CODE (type) == RECORD_TYPE)
5169 tree field;
5171 /* For a struct the APCS says that we only return in a register
5172 if the type is 'integer like' and every addressable element
5173 has an offset of zero. For practical purposes this means
5174 that the structure can have at most one non bit-field element
5175 and that this element must be the first one in the structure. */
5177 /* Find the first field, ignoring non FIELD_DECL things which will
5178 have been created by C++. */
5179 for (field = TYPE_FIELDS (type);
5180 field && TREE_CODE (field) != FIELD_DECL;
5181 field = DECL_CHAIN (field))
5182 continue;
5184 if (field == NULL)
5185 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5187 /* Check that the first field is valid for returning in a register. */
5189 /* ... Floats are not allowed */
5190 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5191 return true;
5193 /* ... Aggregates that are not themselves valid for returning in
5194 a register are not allowed. */
5195 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5196 return true;
5198 /* Now check the remaining fields, if any. Only bitfields are allowed,
5199 since they are not addressable. */
5200 for (field = DECL_CHAIN (field);
5201 field;
5202 field = DECL_CHAIN (field))
5204 if (TREE_CODE (field) != FIELD_DECL)
5205 continue;
5207 if (!DECL_BIT_FIELD_TYPE (field))
5208 return true;
5211 return false;
5214 if (TREE_CODE (type) == UNION_TYPE)
5216 tree field;
5218 /* Unions can be returned in registers if every element is
5219 integral, or can be returned in an integer register. */
5220 for (field = TYPE_FIELDS (type);
5221 field;
5222 field = DECL_CHAIN (field))
5224 if (TREE_CODE (field) != FIELD_DECL)
5225 continue;
5227 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5228 return true;
5230 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5231 return true;
5234 return false;
5236 #endif /* not ARM_WINCE */
5238 /* Return all other types in memory. */
5239 return true;
5242 const struct pcs_attribute_arg
5244 const char *arg;
5245 enum arm_pcs value;
5246 } pcs_attribute_args[] =
5248 {"aapcs", ARM_PCS_AAPCS},
5249 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5250 #if 0
5251 /* We could recognize these, but changes would be needed elsewhere
5252 * to implement them. */
5253 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5254 {"atpcs", ARM_PCS_ATPCS},
5255 {"apcs", ARM_PCS_APCS},
5256 #endif
5257 {NULL, ARM_PCS_UNKNOWN}
5260 static enum arm_pcs
5261 arm_pcs_from_attribute (tree attr)
5263 const struct pcs_attribute_arg *ptr;
5264 const char *arg;
5266 /* Get the value of the argument. */
5267 if (TREE_VALUE (attr) == NULL_TREE
5268 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5269 return ARM_PCS_UNKNOWN;
5271 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5273 /* Check it against the list of known arguments. */
5274 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5275 if (streq (arg, ptr->arg))
5276 return ptr->value;
5278 /* An unrecognized interrupt type. */
5279 return ARM_PCS_UNKNOWN;
5282 /* Get the PCS variant to use for this call. TYPE is the function's type
5283 specification, DECL is the specific declartion. DECL may be null if
5284 the call could be indirect or if this is a library call. */
5285 static enum arm_pcs
5286 arm_get_pcs_model (const_tree type, const_tree decl)
5288 bool user_convention = false;
5289 enum arm_pcs user_pcs = arm_pcs_default;
5290 tree attr;
5292 gcc_assert (type);
5294 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5295 if (attr)
5297 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5298 user_convention = true;
5301 if (TARGET_AAPCS_BASED)
5303 /* Detect varargs functions. These always use the base rules
5304 (no argument is ever a candidate for a co-processor
5305 register). */
5306 bool base_rules = stdarg_p (type);
5308 if (user_convention)
5310 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5311 sorry ("non-AAPCS derived PCS variant");
5312 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5313 error ("variadic functions must use the base AAPCS variant");
5316 if (base_rules)
5317 return ARM_PCS_AAPCS;
5318 else if (user_convention)
5319 return user_pcs;
5320 else if (decl && flag_unit_at_a_time)
5322 /* Local functions never leak outside this compilation unit,
5323 so we are free to use whatever conventions are
5324 appropriate. */
5325 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5326 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5327 if (i && i->local)
5328 return ARM_PCS_AAPCS_LOCAL;
5331 else if (user_convention && user_pcs != arm_pcs_default)
5332 sorry ("PCS variant");
5334 /* For everything else we use the target's default. */
5335 return arm_pcs_default;
5339 static void
5340 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5341 const_tree fntype ATTRIBUTE_UNUSED,
5342 rtx libcall ATTRIBUTE_UNUSED,
5343 const_tree fndecl ATTRIBUTE_UNUSED)
5345 /* Record the unallocated VFP registers. */
5346 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5347 pcum->aapcs_vfp_reg_alloc = 0;
5350 /* Walk down the type tree of TYPE counting consecutive base elements.
5351 If *MODEP is VOIDmode, then set it to the first valid floating point
5352 type. If a non-floating point type is found, or if a floating point
5353 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5354 otherwise return the count in the sub-tree. */
5355 static int
5356 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5358 machine_mode mode;
5359 HOST_WIDE_INT size;
5361 switch (TREE_CODE (type))
5363 case REAL_TYPE:
5364 mode = TYPE_MODE (type);
5365 if (mode != DFmode && mode != SFmode)
5366 return -1;
5368 if (*modep == VOIDmode)
5369 *modep = mode;
5371 if (*modep == mode)
5372 return 1;
5374 break;
5376 case COMPLEX_TYPE:
5377 mode = TYPE_MODE (TREE_TYPE (type));
5378 if (mode != DFmode && mode != SFmode)
5379 return -1;
5381 if (*modep == VOIDmode)
5382 *modep = mode;
5384 if (*modep == mode)
5385 return 2;
5387 break;
5389 case VECTOR_TYPE:
5390 /* Use V2SImode and V4SImode as representatives of all 64-bit
5391 and 128-bit vector types, whether or not those modes are
5392 supported with the present options. */
5393 size = int_size_in_bytes (type);
5394 switch (size)
5396 case 8:
5397 mode = V2SImode;
5398 break;
5399 case 16:
5400 mode = V4SImode;
5401 break;
5402 default:
5403 return -1;
5406 if (*modep == VOIDmode)
5407 *modep = mode;
5409 /* Vector modes are considered to be opaque: two vectors are
5410 equivalent for the purposes of being homogeneous aggregates
5411 if they are the same size. */
5412 if (*modep == mode)
5413 return 1;
5415 break;
5417 case ARRAY_TYPE:
5419 int count;
5420 tree index = TYPE_DOMAIN (type);
5422 /* Can't handle incomplete types nor sizes that are not
5423 fixed. */
5424 if (!COMPLETE_TYPE_P (type)
5425 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5426 return -1;
5428 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5429 if (count == -1
5430 || !index
5431 || !TYPE_MAX_VALUE (index)
5432 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5433 || !TYPE_MIN_VALUE (index)
5434 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5435 || count < 0)
5436 return -1;
5438 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5439 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5441 /* There must be no padding. */
5442 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5443 return -1;
5445 return count;
5448 case RECORD_TYPE:
5450 int count = 0;
5451 int sub_count;
5452 tree field;
5454 /* Can't handle incomplete types nor sizes that are not
5455 fixed. */
5456 if (!COMPLETE_TYPE_P (type)
5457 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5458 return -1;
5460 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5462 if (TREE_CODE (field) != FIELD_DECL)
5463 continue;
5465 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5466 if (sub_count < 0)
5467 return -1;
5468 count += sub_count;
5471 /* There must be no padding. */
5472 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5473 return -1;
5475 return count;
5478 case UNION_TYPE:
5479 case QUAL_UNION_TYPE:
5481 /* These aren't very interesting except in a degenerate case. */
5482 int count = 0;
5483 int sub_count;
5484 tree field;
5486 /* Can't handle incomplete types nor sizes that are not
5487 fixed. */
5488 if (!COMPLETE_TYPE_P (type)
5489 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5490 return -1;
5492 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5494 if (TREE_CODE (field) != FIELD_DECL)
5495 continue;
5497 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5498 if (sub_count < 0)
5499 return -1;
5500 count = count > sub_count ? count : sub_count;
5503 /* There must be no padding. */
5504 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5505 return -1;
5507 return count;
5510 default:
5511 break;
5514 return -1;
5517 /* Return true if PCS_VARIANT should use VFP registers. */
5518 static bool
5519 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5521 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5523 static bool seen_thumb1_vfp = false;
5525 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5527 sorry ("Thumb-1 hard-float VFP ABI");
5528 /* sorry() is not immediately fatal, so only display this once. */
5529 seen_thumb1_vfp = true;
5532 return true;
5535 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5536 return false;
5538 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5539 (TARGET_VFP_DOUBLE || !is_double));
5542 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5543 suitable for passing or returning in VFP registers for the PCS
5544 variant selected. If it is, then *BASE_MODE is updated to contain
5545 a machine mode describing each element of the argument's type and
5546 *COUNT to hold the number of such elements. */
5547 static bool
5548 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5549 machine_mode mode, const_tree type,
5550 machine_mode *base_mode, int *count)
5552 machine_mode new_mode = VOIDmode;
5554 /* If we have the type information, prefer that to working things
5555 out from the mode. */
5556 if (type)
5558 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5560 if (ag_count > 0 && ag_count <= 4)
5561 *count = ag_count;
5562 else
5563 return false;
5565 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5566 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5567 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5569 *count = 1;
5570 new_mode = mode;
5572 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5574 *count = 2;
5575 new_mode = (mode == DCmode ? DFmode : SFmode);
5577 else
5578 return false;
5581 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5582 return false;
5584 *base_mode = new_mode;
5585 return true;
5588 static bool
5589 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5590 machine_mode mode, const_tree type)
5592 int count ATTRIBUTE_UNUSED;
5593 machine_mode ag_mode ATTRIBUTE_UNUSED;
5595 if (!use_vfp_abi (pcs_variant, false))
5596 return false;
5597 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5598 &ag_mode, &count);
5601 static bool
5602 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5603 const_tree type)
5605 if (!use_vfp_abi (pcum->pcs_variant, false))
5606 return false;
5608 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5609 &pcum->aapcs_vfp_rmode,
5610 &pcum->aapcs_vfp_rcount);
5613 static bool
5614 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5615 const_tree type ATTRIBUTE_UNUSED)
5617 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5618 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5619 int regno;
5621 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5622 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5624 pcum->aapcs_vfp_reg_alloc = mask << regno;
5625 if (mode == BLKmode
5626 || (mode == TImode && ! TARGET_NEON)
5627 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5629 int i;
5630 int rcount = pcum->aapcs_vfp_rcount;
5631 int rshift = shift;
5632 machine_mode rmode = pcum->aapcs_vfp_rmode;
5633 rtx par;
5634 if (!TARGET_NEON)
5636 /* Avoid using unsupported vector modes. */
5637 if (rmode == V2SImode)
5638 rmode = DImode;
5639 else if (rmode == V4SImode)
5641 rmode = DImode;
5642 rcount *= 2;
5643 rshift /= 2;
5646 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5647 for (i = 0; i < rcount; i++)
5649 rtx tmp = gen_rtx_REG (rmode,
5650 FIRST_VFP_REGNUM + regno + i * rshift);
5651 tmp = gen_rtx_EXPR_LIST
5652 (VOIDmode, tmp,
5653 GEN_INT (i * GET_MODE_SIZE (rmode)));
5654 XVECEXP (par, 0, i) = tmp;
5657 pcum->aapcs_reg = par;
5659 else
5660 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5661 return true;
5663 return false;
5666 static rtx
5667 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5668 machine_mode mode,
5669 const_tree type ATTRIBUTE_UNUSED)
5671 if (!use_vfp_abi (pcs_variant, false))
5672 return NULL;
5674 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5676 int count;
5677 machine_mode ag_mode;
5678 int i;
5679 rtx par;
5680 int shift;
5682 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5683 &ag_mode, &count);
5685 if (!TARGET_NEON)
5687 if (ag_mode == V2SImode)
5688 ag_mode = DImode;
5689 else if (ag_mode == V4SImode)
5691 ag_mode = DImode;
5692 count *= 2;
5695 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5696 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5697 for (i = 0; i < count; i++)
5699 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5700 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5701 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5702 XVECEXP (par, 0, i) = tmp;
5705 return par;
5708 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5711 static void
5712 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5713 machine_mode mode ATTRIBUTE_UNUSED,
5714 const_tree type ATTRIBUTE_UNUSED)
5716 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5717 pcum->aapcs_vfp_reg_alloc = 0;
5718 return;
5721 #define AAPCS_CP(X) \
5723 aapcs_ ## X ## _cum_init, \
5724 aapcs_ ## X ## _is_call_candidate, \
5725 aapcs_ ## X ## _allocate, \
5726 aapcs_ ## X ## _is_return_candidate, \
5727 aapcs_ ## X ## _allocate_return_reg, \
5728 aapcs_ ## X ## _advance \
5731 /* Table of co-processors that can be used to pass arguments in
5732 registers. Idealy no arugment should be a candidate for more than
5733 one co-processor table entry, but the table is processed in order
5734 and stops after the first match. If that entry then fails to put
5735 the argument into a co-processor register, the argument will go on
5736 the stack. */
5737 static struct
5739 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5740 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5742 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5743 BLKmode) is a candidate for this co-processor's registers; this
5744 function should ignore any position-dependent state in
5745 CUMULATIVE_ARGS and only use call-type dependent information. */
5746 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5748 /* Return true if the argument does get a co-processor register; it
5749 should set aapcs_reg to an RTX of the register allocated as is
5750 required for a return from FUNCTION_ARG. */
5751 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5753 /* Return true if a result of mode MODE (or type TYPE if MODE is
5754 BLKmode) is can be returned in this co-processor's registers. */
5755 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
5757 /* Allocate and return an RTX element to hold the return type of a
5758 call, this routine must not fail and will only be called if
5759 is_return_candidate returned true with the same parameters. */
5760 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
5762 /* Finish processing this argument and prepare to start processing
5763 the next one. */
5764 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5765 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5767 AAPCS_CP(vfp)
5770 #undef AAPCS_CP
5772 static int
5773 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
5774 const_tree type)
5776 int i;
5778 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5779 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5780 return i;
5782 return -1;
5785 static int
5786 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5788 /* We aren't passed a decl, so we can't check that a call is local.
5789 However, it isn't clear that that would be a win anyway, since it
5790 might limit some tail-calling opportunities. */
5791 enum arm_pcs pcs_variant;
5793 if (fntype)
5795 const_tree fndecl = NULL_TREE;
5797 if (TREE_CODE (fntype) == FUNCTION_DECL)
5799 fndecl = fntype;
5800 fntype = TREE_TYPE (fntype);
5803 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5805 else
5806 pcs_variant = arm_pcs_default;
5808 if (pcs_variant != ARM_PCS_AAPCS)
5810 int i;
5812 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5813 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5814 TYPE_MODE (type),
5815 type))
5816 return i;
5818 return -1;
5821 static rtx
5822 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
5823 const_tree fntype)
5825 /* We aren't passed a decl, so we can't check that a call is local.
5826 However, it isn't clear that that would be a win anyway, since it
5827 might limit some tail-calling opportunities. */
5828 enum arm_pcs pcs_variant;
5829 int unsignedp ATTRIBUTE_UNUSED;
5831 if (fntype)
5833 const_tree fndecl = NULL_TREE;
5835 if (TREE_CODE (fntype) == FUNCTION_DECL)
5837 fndecl = fntype;
5838 fntype = TREE_TYPE (fntype);
5841 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5843 else
5844 pcs_variant = arm_pcs_default;
5846 /* Promote integer types. */
5847 if (type && INTEGRAL_TYPE_P (type))
5848 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5850 if (pcs_variant != ARM_PCS_AAPCS)
5852 int i;
5854 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5855 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5856 type))
5857 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5858 mode, type);
5861 /* Promotes small structs returned in a register to full-word size
5862 for big-endian AAPCS. */
5863 if (type && arm_return_in_msb (type))
5865 HOST_WIDE_INT size = int_size_in_bytes (type);
5866 if (size % UNITS_PER_WORD != 0)
5868 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5869 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5873 return gen_rtx_REG (mode, R0_REGNUM);
5876 static rtx
5877 aapcs_libcall_value (machine_mode mode)
5879 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5880 && GET_MODE_SIZE (mode) <= 4)
5881 mode = SImode;
5883 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5886 /* Lay out a function argument using the AAPCS rules. The rule
5887 numbers referred to here are those in the AAPCS. */
5888 static void
5889 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
5890 const_tree type, bool named)
5892 int nregs, nregs2;
5893 int ncrn;
5895 /* We only need to do this once per argument. */
5896 if (pcum->aapcs_arg_processed)
5897 return;
5899 pcum->aapcs_arg_processed = true;
5901 /* Special case: if named is false then we are handling an incoming
5902 anonymous argument which is on the stack. */
5903 if (!named)
5904 return;
5906 /* Is this a potential co-processor register candidate? */
5907 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5909 int slot = aapcs_select_call_coproc (pcum, mode, type);
5910 pcum->aapcs_cprc_slot = slot;
5912 /* We don't have to apply any of the rules from part B of the
5913 preparation phase, these are handled elsewhere in the
5914 compiler. */
5916 if (slot >= 0)
5918 /* A Co-processor register candidate goes either in its own
5919 class of registers or on the stack. */
5920 if (!pcum->aapcs_cprc_failed[slot])
5922 /* C1.cp - Try to allocate the argument to co-processor
5923 registers. */
5924 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5925 return;
5927 /* C2.cp - Put the argument on the stack and note that we
5928 can't assign any more candidates in this slot. We also
5929 need to note that we have allocated stack space, so that
5930 we won't later try to split a non-cprc candidate between
5931 core registers and the stack. */
5932 pcum->aapcs_cprc_failed[slot] = true;
5933 pcum->can_split = false;
5936 /* We didn't get a register, so this argument goes on the
5937 stack. */
5938 gcc_assert (pcum->can_split == false);
5939 return;
5943 /* C3 - For double-word aligned arguments, round the NCRN up to the
5944 next even number. */
5945 ncrn = pcum->aapcs_ncrn;
5946 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5947 ncrn++;
5949 nregs = ARM_NUM_REGS2(mode, type);
5951 /* Sigh, this test should really assert that nregs > 0, but a GCC
5952 extension allows empty structs and then gives them empty size; it
5953 then allows such a structure to be passed by value. For some of
5954 the code below we have to pretend that such an argument has
5955 non-zero size so that we 'locate' it correctly either in
5956 registers or on the stack. */
5957 gcc_assert (nregs >= 0);
5959 nregs2 = nregs ? nregs : 1;
5961 /* C4 - Argument fits entirely in core registers. */
5962 if (ncrn + nregs2 <= NUM_ARG_REGS)
5964 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5965 pcum->aapcs_next_ncrn = ncrn + nregs;
5966 return;
5969 /* C5 - Some core registers left and there are no arguments already
5970 on the stack: split this argument between the remaining core
5971 registers and the stack. */
5972 if (ncrn < NUM_ARG_REGS && pcum->can_split)
5974 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5975 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5976 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5977 return;
5980 /* C6 - NCRN is set to 4. */
5981 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5983 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5984 return;
5987 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5988 for a call to a function whose data type is FNTYPE.
5989 For a library call, FNTYPE is NULL. */
5990 void
5991 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5992 rtx libname,
5993 tree fndecl ATTRIBUTE_UNUSED)
5995 /* Long call handling. */
5996 if (fntype)
5997 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5998 else
5999 pcum->pcs_variant = arm_pcs_default;
6001 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6003 if (arm_libcall_uses_aapcs_base (libname))
6004 pcum->pcs_variant = ARM_PCS_AAPCS;
6006 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6007 pcum->aapcs_reg = NULL_RTX;
6008 pcum->aapcs_partial = 0;
6009 pcum->aapcs_arg_processed = false;
6010 pcum->aapcs_cprc_slot = -1;
6011 pcum->can_split = true;
6013 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6015 int i;
6017 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6019 pcum->aapcs_cprc_failed[i] = false;
6020 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6023 return;
6026 /* Legacy ABIs */
6028 /* On the ARM, the offset starts at 0. */
6029 pcum->nregs = 0;
6030 pcum->iwmmxt_nregs = 0;
6031 pcum->can_split = true;
6033 /* Varargs vectors are treated the same as long long.
6034 named_count avoids having to change the way arm handles 'named' */
6035 pcum->named_count = 0;
6036 pcum->nargs = 0;
6038 if (TARGET_REALLY_IWMMXT && fntype)
6040 tree fn_arg;
6042 for (fn_arg = TYPE_ARG_TYPES (fntype);
6043 fn_arg;
6044 fn_arg = TREE_CHAIN (fn_arg))
6045 pcum->named_count += 1;
6047 if (! pcum->named_count)
6048 pcum->named_count = INT_MAX;
6052 /* Return true if mode/type need doubleword alignment. */
6053 static bool
6054 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6056 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
6057 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
6061 /* Determine where to put an argument to a function.
6062 Value is zero to push the argument on the stack,
6063 or a hard register in which to store the argument.
6065 MODE is the argument's machine mode.
6066 TYPE is the data type of the argument (as a tree).
6067 This is null for libcalls where that information may
6068 not be available.
6069 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6070 the preceding args and about the function being called.
6071 NAMED is nonzero if this argument is a named parameter
6072 (otherwise it is an extra parameter matching an ellipsis).
6074 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6075 other arguments are passed on the stack. If (NAMED == 0) (which happens
6076 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6077 defined), say it is passed in the stack (function_prologue will
6078 indeed make it pass in the stack if necessary). */
6080 static rtx
6081 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6082 const_tree type, bool named)
6084 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6085 int nregs;
6087 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6088 a call insn (op3 of a call_value insn). */
6089 if (mode == VOIDmode)
6090 return const0_rtx;
6092 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6094 aapcs_layout_arg (pcum, mode, type, named);
6095 return pcum->aapcs_reg;
6098 /* Varargs vectors are treated the same as long long.
6099 named_count avoids having to change the way arm handles 'named' */
6100 if (TARGET_IWMMXT_ABI
6101 && arm_vector_mode_supported_p (mode)
6102 && pcum->named_count > pcum->nargs + 1)
6104 if (pcum->iwmmxt_nregs <= 9)
6105 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6106 else
6108 pcum->can_split = false;
6109 return NULL_RTX;
6113 /* Put doubleword aligned quantities in even register pairs. */
6114 if (pcum->nregs & 1
6115 && ARM_DOUBLEWORD_ALIGN
6116 && arm_needs_doubleword_align (mode, type))
6117 pcum->nregs++;
6119 /* Only allow splitting an arg between regs and memory if all preceding
6120 args were allocated to regs. For args passed by reference we only count
6121 the reference pointer. */
6122 if (pcum->can_split)
6123 nregs = 1;
6124 else
6125 nregs = ARM_NUM_REGS2 (mode, type);
6127 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6128 return NULL_RTX;
6130 return gen_rtx_REG (mode, pcum->nregs);
6133 static unsigned int
6134 arm_function_arg_boundary (machine_mode mode, const_tree type)
6136 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
6137 ? DOUBLEWORD_ALIGNMENT
6138 : PARM_BOUNDARY);
6141 static int
6142 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6143 tree type, bool named)
6145 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6146 int nregs = pcum->nregs;
6148 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6150 aapcs_layout_arg (pcum, mode, type, named);
6151 return pcum->aapcs_partial;
6154 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6155 return 0;
6157 if (NUM_ARG_REGS > nregs
6158 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6159 && pcum->can_split)
6160 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6162 return 0;
6165 /* Update the data in PCUM to advance over an argument
6166 of mode MODE and data type TYPE.
6167 (TYPE is null for libcalls where that information may not be available.) */
6169 static void
6170 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6171 const_tree type, bool named)
6173 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6175 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6177 aapcs_layout_arg (pcum, mode, type, named);
6179 if (pcum->aapcs_cprc_slot >= 0)
6181 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6182 type);
6183 pcum->aapcs_cprc_slot = -1;
6186 /* Generic stuff. */
6187 pcum->aapcs_arg_processed = false;
6188 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6189 pcum->aapcs_reg = NULL_RTX;
6190 pcum->aapcs_partial = 0;
6192 else
6194 pcum->nargs += 1;
6195 if (arm_vector_mode_supported_p (mode)
6196 && pcum->named_count > pcum->nargs
6197 && TARGET_IWMMXT_ABI)
6198 pcum->iwmmxt_nregs += 1;
6199 else
6200 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6204 /* Variable sized types are passed by reference. This is a GCC
6205 extension to the ARM ABI. */
6207 static bool
6208 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6209 machine_mode mode ATTRIBUTE_UNUSED,
6210 const_tree type, bool named ATTRIBUTE_UNUSED)
6212 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6215 /* Encode the current state of the #pragma [no_]long_calls. */
6216 typedef enum
6218 OFF, /* No #pragma [no_]long_calls is in effect. */
6219 LONG, /* #pragma long_calls is in effect. */
6220 SHORT /* #pragma no_long_calls is in effect. */
6221 } arm_pragma_enum;
6223 static arm_pragma_enum arm_pragma_long_calls = OFF;
6225 void
6226 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6228 arm_pragma_long_calls = LONG;
6231 void
6232 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6234 arm_pragma_long_calls = SHORT;
6237 void
6238 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6240 arm_pragma_long_calls = OFF;
6243 /* Handle an attribute requiring a FUNCTION_DECL;
6244 arguments as in struct attribute_spec.handler. */
6245 static tree
6246 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6247 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6249 if (TREE_CODE (*node) != FUNCTION_DECL)
6251 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6252 name);
6253 *no_add_attrs = true;
6256 return NULL_TREE;
6259 /* Handle an "interrupt" or "isr" attribute;
6260 arguments as in struct attribute_spec.handler. */
6261 static tree
6262 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6263 bool *no_add_attrs)
6265 if (DECL_P (*node))
6267 if (TREE_CODE (*node) != FUNCTION_DECL)
6269 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6270 name);
6271 *no_add_attrs = true;
6273 /* FIXME: the argument if any is checked for type attributes;
6274 should it be checked for decl ones? */
6276 else
6278 if (TREE_CODE (*node) == FUNCTION_TYPE
6279 || TREE_CODE (*node) == METHOD_TYPE)
6281 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6283 warning (OPT_Wattributes, "%qE attribute ignored",
6284 name);
6285 *no_add_attrs = true;
6288 else if (TREE_CODE (*node) == POINTER_TYPE
6289 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6290 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6291 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6293 *node = build_variant_type_copy (*node);
6294 TREE_TYPE (*node) = build_type_attribute_variant
6295 (TREE_TYPE (*node),
6296 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6297 *no_add_attrs = true;
6299 else
6301 /* Possibly pass this attribute on from the type to a decl. */
6302 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6303 | (int) ATTR_FLAG_FUNCTION_NEXT
6304 | (int) ATTR_FLAG_ARRAY_NEXT))
6306 *no_add_attrs = true;
6307 return tree_cons (name, args, NULL_TREE);
6309 else
6311 warning (OPT_Wattributes, "%qE attribute ignored",
6312 name);
6317 return NULL_TREE;
6320 /* Handle a "pcs" attribute; arguments as in struct
6321 attribute_spec.handler. */
6322 static tree
6323 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6324 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6326 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6328 warning (OPT_Wattributes, "%qE attribute ignored", name);
6329 *no_add_attrs = true;
6331 return NULL_TREE;
6334 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6335 /* Handle the "notshared" attribute. This attribute is another way of
6336 requesting hidden visibility. ARM's compiler supports
6337 "__declspec(notshared)"; we support the same thing via an
6338 attribute. */
6340 static tree
6341 arm_handle_notshared_attribute (tree *node,
6342 tree name ATTRIBUTE_UNUSED,
6343 tree args ATTRIBUTE_UNUSED,
6344 int flags ATTRIBUTE_UNUSED,
6345 bool *no_add_attrs)
6347 tree decl = TYPE_NAME (*node);
6349 if (decl)
6351 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6352 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6353 *no_add_attrs = false;
6355 return NULL_TREE;
6357 #endif
6359 /* Return 0 if the attributes for two types are incompatible, 1 if they
6360 are compatible, and 2 if they are nearly compatible (which causes a
6361 warning to be generated). */
6362 static int
6363 arm_comp_type_attributes (const_tree type1, const_tree type2)
6365 int l1, l2, s1, s2;
6367 /* Check for mismatch of non-default calling convention. */
6368 if (TREE_CODE (type1) != FUNCTION_TYPE)
6369 return 1;
6371 /* Check for mismatched call attributes. */
6372 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6373 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6374 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6375 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6377 /* Only bother to check if an attribute is defined. */
6378 if (l1 | l2 | s1 | s2)
6380 /* If one type has an attribute, the other must have the same attribute. */
6381 if ((l1 != l2) || (s1 != s2))
6382 return 0;
6384 /* Disallow mixed attributes. */
6385 if ((l1 & s2) || (l2 & s1))
6386 return 0;
6389 /* Check for mismatched ISR attribute. */
6390 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6391 if (! l1)
6392 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6393 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6394 if (! l2)
6395 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6396 if (l1 != l2)
6397 return 0;
6399 return 1;
6402 /* Assigns default attributes to newly defined type. This is used to
6403 set short_call/long_call attributes for function types of
6404 functions defined inside corresponding #pragma scopes. */
6405 static void
6406 arm_set_default_type_attributes (tree type)
6408 /* Add __attribute__ ((long_call)) to all functions, when
6409 inside #pragma long_calls or __attribute__ ((short_call)),
6410 when inside #pragma no_long_calls. */
6411 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6413 tree type_attr_list, attr_name;
6414 type_attr_list = TYPE_ATTRIBUTES (type);
6416 if (arm_pragma_long_calls == LONG)
6417 attr_name = get_identifier ("long_call");
6418 else if (arm_pragma_long_calls == SHORT)
6419 attr_name = get_identifier ("short_call");
6420 else
6421 return;
6423 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6424 TYPE_ATTRIBUTES (type) = type_attr_list;
6428 /* Return true if DECL is known to be linked into section SECTION. */
6430 static bool
6431 arm_function_in_section_p (tree decl, section *section)
6433 /* We can only be certain about the prevailing symbol definition. */
6434 if (!decl_binds_to_current_def_p (decl))
6435 return false;
6437 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6438 if (!DECL_SECTION_NAME (decl))
6440 /* Make sure that we will not create a unique section for DECL. */
6441 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6442 return false;
6445 return function_section (decl) == section;
6448 /* Return nonzero if a 32-bit "long_call" should be generated for
6449 a call from the current function to DECL. We generate a long_call
6450 if the function:
6452 a. has an __attribute__((long call))
6453 or b. is within the scope of a #pragma long_calls
6454 or c. the -mlong-calls command line switch has been specified
6456 However we do not generate a long call if the function:
6458 d. has an __attribute__ ((short_call))
6459 or e. is inside the scope of a #pragma no_long_calls
6460 or f. is defined in the same section as the current function. */
6462 bool
6463 arm_is_long_call_p (tree decl)
6465 tree attrs;
6467 if (!decl)
6468 return TARGET_LONG_CALLS;
6470 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6471 if (lookup_attribute ("short_call", attrs))
6472 return false;
6474 /* For "f", be conservative, and only cater for cases in which the
6475 whole of the current function is placed in the same section. */
6476 if (!flag_reorder_blocks_and_partition
6477 && TREE_CODE (decl) == FUNCTION_DECL
6478 && arm_function_in_section_p (decl, current_function_section ()))
6479 return false;
6481 if (lookup_attribute ("long_call", attrs))
6482 return true;
6484 return TARGET_LONG_CALLS;
6487 /* Return nonzero if it is ok to make a tail-call to DECL. */
6488 static bool
6489 arm_function_ok_for_sibcall (tree decl, tree exp)
6491 unsigned long func_type;
6493 if (cfun->machine->sibcall_blocked)
6494 return false;
6496 /* Never tailcall something if we are generating code for Thumb-1. */
6497 if (TARGET_THUMB1)
6498 return false;
6500 /* The PIC register is live on entry to VxWorks PLT entries, so we
6501 must make the call before restoring the PIC register. */
6502 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6503 return false;
6505 /* If we are interworking and the function is not declared static
6506 then we can't tail-call it unless we know that it exists in this
6507 compilation unit (since it might be a Thumb routine). */
6508 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6509 && !TREE_ASM_WRITTEN (decl))
6510 return false;
6512 func_type = arm_current_func_type ();
6513 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6514 if (IS_INTERRUPT (func_type))
6515 return false;
6517 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6519 /* Check that the return value locations are the same. For
6520 example that we aren't returning a value from the sibling in
6521 a VFP register but then need to transfer it to a core
6522 register. */
6523 rtx a, b;
6525 a = arm_function_value (TREE_TYPE (exp), decl, false);
6526 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6527 cfun->decl, false);
6528 if (!rtx_equal_p (a, b))
6529 return false;
6532 /* Never tailcall if function may be called with a misaligned SP. */
6533 if (IS_STACKALIGN (func_type))
6534 return false;
6536 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6537 references should become a NOP. Don't convert such calls into
6538 sibling calls. */
6539 if (TARGET_AAPCS_BASED
6540 && arm_abi == ARM_ABI_AAPCS
6541 && decl
6542 && DECL_WEAK (decl))
6543 return false;
6545 /* Everything else is ok. */
6546 return true;
6550 /* Addressing mode support functions. */
6552 /* Return nonzero if X is a legitimate immediate operand when compiling
6553 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6555 legitimate_pic_operand_p (rtx x)
6557 if (GET_CODE (x) == SYMBOL_REF
6558 || (GET_CODE (x) == CONST
6559 && GET_CODE (XEXP (x, 0)) == PLUS
6560 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6561 return 0;
6563 return 1;
6566 /* Record that the current function needs a PIC register. Initialize
6567 cfun->machine->pic_reg if we have not already done so. */
6569 static void
6570 require_pic_register (void)
6572 /* A lot of the logic here is made obscure by the fact that this
6573 routine gets called as part of the rtx cost estimation process.
6574 We don't want those calls to affect any assumptions about the real
6575 function; and further, we can't call entry_of_function() until we
6576 start the real expansion process. */
6577 if (!crtl->uses_pic_offset_table)
6579 gcc_assert (can_create_pseudo_p ());
6580 if (arm_pic_register != INVALID_REGNUM
6581 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6583 if (!cfun->machine->pic_reg)
6584 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6586 /* Play games to avoid marking the function as needing pic
6587 if we are being called as part of the cost-estimation
6588 process. */
6589 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6590 crtl->uses_pic_offset_table = 1;
6592 else
6594 rtx_insn *seq, *insn;
6596 if (!cfun->machine->pic_reg)
6597 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6599 /* Play games to avoid marking the function as needing pic
6600 if we are being called as part of the cost-estimation
6601 process. */
6602 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6604 crtl->uses_pic_offset_table = 1;
6605 start_sequence ();
6607 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6608 && arm_pic_register > LAST_LO_REGNUM)
6609 emit_move_insn (cfun->machine->pic_reg,
6610 gen_rtx_REG (Pmode, arm_pic_register));
6611 else
6612 arm_load_pic_register (0UL);
6614 seq = get_insns ();
6615 end_sequence ();
6617 for (insn = seq; insn; insn = NEXT_INSN (insn))
6618 if (INSN_P (insn))
6619 INSN_LOCATION (insn) = prologue_location;
6621 /* We can be called during expansion of PHI nodes, where
6622 we can't yet emit instructions directly in the final
6623 insn stream. Queue the insns on the entry edge, they will
6624 be committed after everything else is expanded. */
6625 insert_insn_on_edge (seq,
6626 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6633 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6635 if (GET_CODE (orig) == SYMBOL_REF
6636 || GET_CODE (orig) == LABEL_REF)
6638 rtx insn;
6640 if (reg == 0)
6642 gcc_assert (can_create_pseudo_p ());
6643 reg = gen_reg_rtx (Pmode);
6646 /* VxWorks does not impose a fixed gap between segments; the run-time
6647 gap can be different from the object-file gap. We therefore can't
6648 use GOTOFF unless we are absolutely sure that the symbol is in the
6649 same segment as the GOT. Unfortunately, the flexibility of linker
6650 scripts means that we can't be sure of that in general, so assume
6651 that GOTOFF is never valid on VxWorks. */
6652 if ((GET_CODE (orig) == LABEL_REF
6653 || (GET_CODE (orig) == SYMBOL_REF &&
6654 SYMBOL_REF_LOCAL_P (orig)))
6655 && NEED_GOT_RELOC
6656 && arm_pic_data_is_text_relative)
6657 insn = arm_pic_static_addr (orig, reg);
6658 else
6660 rtx pat;
6661 rtx mem;
6663 /* If this function doesn't have a pic register, create one now. */
6664 require_pic_register ();
6666 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6668 /* Make the MEM as close to a constant as possible. */
6669 mem = SET_SRC (pat);
6670 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6671 MEM_READONLY_P (mem) = 1;
6672 MEM_NOTRAP_P (mem) = 1;
6674 insn = emit_insn (pat);
6677 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6678 by loop. */
6679 set_unique_reg_note (insn, REG_EQUAL, orig);
6681 return reg;
6683 else if (GET_CODE (orig) == CONST)
6685 rtx base, offset;
6687 if (GET_CODE (XEXP (orig, 0)) == PLUS
6688 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6689 return orig;
6691 /* Handle the case where we have: const (UNSPEC_TLS). */
6692 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6693 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6694 return orig;
6696 /* Handle the case where we have:
6697 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6698 CONST_INT. */
6699 if (GET_CODE (XEXP (orig, 0)) == PLUS
6700 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6701 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6703 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6704 return orig;
6707 if (reg == 0)
6709 gcc_assert (can_create_pseudo_p ());
6710 reg = gen_reg_rtx (Pmode);
6713 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6715 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6716 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6717 base == reg ? 0 : reg);
6719 if (CONST_INT_P (offset))
6721 /* The base register doesn't really matter, we only want to
6722 test the index for the appropriate mode. */
6723 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6725 gcc_assert (can_create_pseudo_p ());
6726 offset = force_reg (Pmode, offset);
6729 if (CONST_INT_P (offset))
6730 return plus_constant (Pmode, base, INTVAL (offset));
6733 if (GET_MODE_SIZE (mode) > 4
6734 && (GET_MODE_CLASS (mode) == MODE_INT
6735 || TARGET_SOFT_FLOAT))
6737 emit_insn (gen_addsi3 (reg, base, offset));
6738 return reg;
6741 return gen_rtx_PLUS (Pmode, base, offset);
6744 return orig;
6748 /* Find a spare register to use during the prolog of a function. */
6750 static int
6751 thumb_find_work_register (unsigned long pushed_regs_mask)
6753 int reg;
6755 /* Check the argument registers first as these are call-used. The
6756 register allocation order means that sometimes r3 might be used
6757 but earlier argument registers might not, so check them all. */
6758 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6759 if (!df_regs_ever_live_p (reg))
6760 return reg;
6762 /* Before going on to check the call-saved registers we can try a couple
6763 more ways of deducing that r3 is available. The first is when we are
6764 pushing anonymous arguments onto the stack and we have less than 4
6765 registers worth of fixed arguments(*). In this case r3 will be part of
6766 the variable argument list and so we can be sure that it will be
6767 pushed right at the start of the function. Hence it will be available
6768 for the rest of the prologue.
6769 (*): ie crtl->args.pretend_args_size is greater than 0. */
6770 if (cfun->machine->uses_anonymous_args
6771 && crtl->args.pretend_args_size > 0)
6772 return LAST_ARG_REGNUM;
6774 /* The other case is when we have fixed arguments but less than 4 registers
6775 worth. In this case r3 might be used in the body of the function, but
6776 it is not being used to convey an argument into the function. In theory
6777 we could just check crtl->args.size to see how many bytes are
6778 being passed in argument registers, but it seems that it is unreliable.
6779 Sometimes it will have the value 0 when in fact arguments are being
6780 passed. (See testcase execute/20021111-1.c for an example). So we also
6781 check the args_info.nregs field as well. The problem with this field is
6782 that it makes no allowances for arguments that are passed to the
6783 function but which are not used. Hence we could miss an opportunity
6784 when a function has an unused argument in r3. But it is better to be
6785 safe than to be sorry. */
6786 if (! cfun->machine->uses_anonymous_args
6787 && crtl->args.size >= 0
6788 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6789 && (TARGET_AAPCS_BASED
6790 ? crtl->args.info.aapcs_ncrn < 4
6791 : crtl->args.info.nregs < 4))
6792 return LAST_ARG_REGNUM;
6794 /* Otherwise look for a call-saved register that is going to be pushed. */
6795 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6796 if (pushed_regs_mask & (1 << reg))
6797 return reg;
6799 if (TARGET_THUMB2)
6801 /* Thumb-2 can use high regs. */
6802 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6803 if (pushed_regs_mask & (1 << reg))
6804 return reg;
6806 /* Something went wrong - thumb_compute_save_reg_mask()
6807 should have arranged for a suitable register to be pushed. */
6808 gcc_unreachable ();
6811 static GTY(()) int pic_labelno;
6813 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6814 low register. */
6816 void
6817 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6819 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6821 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6822 return;
6824 gcc_assert (flag_pic);
6826 pic_reg = cfun->machine->pic_reg;
6827 if (TARGET_VXWORKS_RTP)
6829 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6830 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6831 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6833 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6835 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6836 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6838 else
6840 /* We use an UNSPEC rather than a LABEL_REF because this label
6841 never appears in the code stream. */
6843 labelno = GEN_INT (pic_labelno++);
6844 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6845 l1 = gen_rtx_CONST (VOIDmode, l1);
6847 /* On the ARM the PC register contains 'dot + 8' at the time of the
6848 addition, on the Thumb it is 'dot + 4'. */
6849 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6850 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6851 UNSPEC_GOTSYM_OFF);
6852 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6854 if (TARGET_32BIT)
6856 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6858 else /* TARGET_THUMB1 */
6860 if (arm_pic_register != INVALID_REGNUM
6861 && REGNO (pic_reg) > LAST_LO_REGNUM)
6863 /* We will have pushed the pic register, so we should always be
6864 able to find a work register. */
6865 pic_tmp = gen_rtx_REG (SImode,
6866 thumb_find_work_register (saved_regs));
6867 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6868 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6869 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6871 else if (arm_pic_register != INVALID_REGNUM
6872 && arm_pic_register > LAST_LO_REGNUM
6873 && REGNO (pic_reg) <= LAST_LO_REGNUM)
6875 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6876 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6877 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6879 else
6880 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6884 /* Need to emit this whether or not we obey regdecls,
6885 since setjmp/longjmp can cause life info to screw up. */
6886 emit_use (pic_reg);
6889 /* Generate code to load the address of a static var when flag_pic is set. */
6890 static rtx
6891 arm_pic_static_addr (rtx orig, rtx reg)
6893 rtx l1, labelno, offset_rtx, insn;
6895 gcc_assert (flag_pic);
6897 /* We use an UNSPEC rather than a LABEL_REF because this label
6898 never appears in the code stream. */
6899 labelno = GEN_INT (pic_labelno++);
6900 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6901 l1 = gen_rtx_CONST (VOIDmode, l1);
6903 /* On the ARM the PC register contains 'dot + 8' at the time of the
6904 addition, on the Thumb it is 'dot + 4'. */
6905 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6906 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6907 UNSPEC_SYMBOL_OFFSET);
6908 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6910 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6911 return insn;
6914 /* Return nonzero if X is valid as an ARM state addressing register. */
6915 static int
6916 arm_address_register_rtx_p (rtx x, int strict_p)
6918 int regno;
6920 if (!REG_P (x))
6921 return 0;
6923 regno = REGNO (x);
6925 if (strict_p)
6926 return ARM_REGNO_OK_FOR_BASE_P (regno);
6928 return (regno <= LAST_ARM_REGNUM
6929 || regno >= FIRST_PSEUDO_REGISTER
6930 || regno == FRAME_POINTER_REGNUM
6931 || regno == ARG_POINTER_REGNUM);
6934 /* Return TRUE if this rtx is the difference of a symbol and a label,
6935 and will reduce to a PC-relative relocation in the object file.
6936 Expressions like this can be left alone when generating PIC, rather
6937 than forced through the GOT. */
6938 static int
6939 pcrel_constant_p (rtx x)
6941 if (GET_CODE (x) == MINUS)
6942 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6944 return FALSE;
6947 /* Return true if X will surely end up in an index register after next
6948 splitting pass. */
6949 static bool
6950 will_be_in_index_register (const_rtx x)
6952 /* arm.md: calculate_pic_address will split this into a register. */
6953 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6956 /* Return nonzero if X is a valid ARM state address operand. */
6958 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
6959 int strict_p)
6961 bool use_ldrd;
6962 enum rtx_code code = GET_CODE (x);
6964 if (arm_address_register_rtx_p (x, strict_p))
6965 return 1;
6967 use_ldrd = (TARGET_LDRD
6968 && (mode == DImode
6969 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6971 if (code == POST_INC || code == PRE_DEC
6972 || ((code == PRE_INC || code == POST_DEC)
6973 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6974 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6976 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6977 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6978 && GET_CODE (XEXP (x, 1)) == PLUS
6979 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6981 rtx addend = XEXP (XEXP (x, 1), 1);
6983 /* Don't allow ldrd post increment by register because it's hard
6984 to fixup invalid register choices. */
6985 if (use_ldrd
6986 && GET_CODE (x) == POST_MODIFY
6987 && REG_P (addend))
6988 return 0;
6990 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6991 && arm_legitimate_index_p (mode, addend, outer, strict_p));
6994 /* After reload constants split into minipools will have addresses
6995 from a LABEL_REF. */
6996 else if (reload_completed
6997 && (code == LABEL_REF
6998 || (code == CONST
6999 && GET_CODE (XEXP (x, 0)) == PLUS
7000 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7001 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7002 return 1;
7004 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7005 return 0;
7007 else if (code == PLUS)
7009 rtx xop0 = XEXP (x, 0);
7010 rtx xop1 = XEXP (x, 1);
7012 return ((arm_address_register_rtx_p (xop0, strict_p)
7013 && ((CONST_INT_P (xop1)
7014 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7015 || (!strict_p && will_be_in_index_register (xop1))))
7016 || (arm_address_register_rtx_p (xop1, strict_p)
7017 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7020 #if 0
7021 /* Reload currently can't handle MINUS, so disable this for now */
7022 else if (GET_CODE (x) == MINUS)
7024 rtx xop0 = XEXP (x, 0);
7025 rtx xop1 = XEXP (x, 1);
7027 return (arm_address_register_rtx_p (xop0, strict_p)
7028 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7030 #endif
7032 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7033 && code == SYMBOL_REF
7034 && CONSTANT_POOL_ADDRESS_P (x)
7035 && ! (flag_pic
7036 && symbol_mentioned_p (get_pool_constant (x))
7037 && ! pcrel_constant_p (get_pool_constant (x))))
7038 return 1;
7040 return 0;
7043 /* Return nonzero if X is a valid Thumb-2 address operand. */
7044 static int
7045 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7047 bool use_ldrd;
7048 enum rtx_code code = GET_CODE (x);
7050 if (arm_address_register_rtx_p (x, strict_p))
7051 return 1;
7053 use_ldrd = (TARGET_LDRD
7054 && (mode == DImode
7055 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
7057 if (code == POST_INC || code == PRE_DEC
7058 || ((code == PRE_INC || code == POST_DEC)
7059 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7060 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7062 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7063 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7064 && GET_CODE (XEXP (x, 1)) == PLUS
7065 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7067 /* Thumb-2 only has autoincrement by constant. */
7068 rtx addend = XEXP (XEXP (x, 1), 1);
7069 HOST_WIDE_INT offset;
7071 if (!CONST_INT_P (addend))
7072 return 0;
7074 offset = INTVAL(addend);
7075 if (GET_MODE_SIZE (mode) <= 4)
7076 return (offset > -256 && offset < 256);
7078 return (use_ldrd && offset > -1024 && offset < 1024
7079 && (offset & 3) == 0);
7082 /* After reload constants split into minipools will have addresses
7083 from a LABEL_REF. */
7084 else if (reload_completed
7085 && (code == LABEL_REF
7086 || (code == CONST
7087 && GET_CODE (XEXP (x, 0)) == PLUS
7088 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7089 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7090 return 1;
7092 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7093 return 0;
7095 else if (code == PLUS)
7097 rtx xop0 = XEXP (x, 0);
7098 rtx xop1 = XEXP (x, 1);
7100 return ((arm_address_register_rtx_p (xop0, strict_p)
7101 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7102 || (!strict_p && will_be_in_index_register (xop1))))
7103 || (arm_address_register_rtx_p (xop1, strict_p)
7104 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7107 /* Normally we can assign constant values to target registers without
7108 the help of constant pool. But there are cases we have to use constant
7109 pool like:
7110 1) assign a label to register.
7111 2) sign-extend a 8bit value to 32bit and then assign to register.
7113 Constant pool access in format:
7114 (set (reg r0) (mem (symbol_ref (".LC0"))))
7115 will cause the use of literal pool (later in function arm_reorg).
7116 So here we mark such format as an invalid format, then the compiler
7117 will adjust it into:
7118 (set (reg r0) (symbol_ref (".LC0")))
7119 (set (reg r0) (mem (reg r0))).
7120 No extra register is required, and (mem (reg r0)) won't cause the use
7121 of literal pools. */
7122 else if (arm_disable_literal_pool && code == SYMBOL_REF
7123 && CONSTANT_POOL_ADDRESS_P (x))
7124 return 0;
7126 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7127 && code == SYMBOL_REF
7128 && CONSTANT_POOL_ADDRESS_P (x)
7129 && ! (flag_pic
7130 && symbol_mentioned_p (get_pool_constant (x))
7131 && ! pcrel_constant_p (get_pool_constant (x))))
7132 return 1;
7134 return 0;
7137 /* Return nonzero if INDEX is valid for an address index operand in
7138 ARM state. */
7139 static int
7140 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7141 int strict_p)
7143 HOST_WIDE_INT range;
7144 enum rtx_code code = GET_CODE (index);
7146 /* Standard coprocessor addressing modes. */
7147 if (TARGET_HARD_FLOAT
7148 && TARGET_VFP
7149 && (mode == SFmode || mode == DFmode))
7150 return (code == CONST_INT && INTVAL (index) < 1024
7151 && INTVAL (index) > -1024
7152 && (INTVAL (index) & 3) == 0);
7154 /* For quad modes, we restrict the constant offset to be slightly less
7155 than what the instruction format permits. We do this because for
7156 quad mode moves, we will actually decompose them into two separate
7157 double-mode reads or writes. INDEX must therefore be a valid
7158 (double-mode) offset and so should INDEX+8. */
7159 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7160 return (code == CONST_INT
7161 && INTVAL (index) < 1016
7162 && INTVAL (index) > -1024
7163 && (INTVAL (index) & 3) == 0);
7165 /* We have no such constraint on double mode offsets, so we permit the
7166 full range of the instruction format. */
7167 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7168 return (code == CONST_INT
7169 && INTVAL (index) < 1024
7170 && INTVAL (index) > -1024
7171 && (INTVAL (index) & 3) == 0);
7173 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7174 return (code == CONST_INT
7175 && INTVAL (index) < 1024
7176 && INTVAL (index) > -1024
7177 && (INTVAL (index) & 3) == 0);
7179 if (arm_address_register_rtx_p (index, strict_p)
7180 && (GET_MODE_SIZE (mode) <= 4))
7181 return 1;
7183 if (mode == DImode || mode == DFmode)
7185 if (code == CONST_INT)
7187 HOST_WIDE_INT val = INTVAL (index);
7189 if (TARGET_LDRD)
7190 return val > -256 && val < 256;
7191 else
7192 return val > -4096 && val < 4092;
7195 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7198 if (GET_MODE_SIZE (mode) <= 4
7199 && ! (arm_arch4
7200 && (mode == HImode
7201 || mode == HFmode
7202 || (mode == QImode && outer == SIGN_EXTEND))))
7204 if (code == MULT)
7206 rtx xiop0 = XEXP (index, 0);
7207 rtx xiop1 = XEXP (index, 1);
7209 return ((arm_address_register_rtx_p (xiop0, strict_p)
7210 && power_of_two_operand (xiop1, SImode))
7211 || (arm_address_register_rtx_p (xiop1, strict_p)
7212 && power_of_two_operand (xiop0, SImode)));
7214 else if (code == LSHIFTRT || code == ASHIFTRT
7215 || code == ASHIFT || code == ROTATERT)
7217 rtx op = XEXP (index, 1);
7219 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7220 && CONST_INT_P (op)
7221 && INTVAL (op) > 0
7222 && INTVAL (op) <= 31);
7226 /* For ARM v4 we may be doing a sign-extend operation during the
7227 load. */
7228 if (arm_arch4)
7230 if (mode == HImode
7231 || mode == HFmode
7232 || (outer == SIGN_EXTEND && mode == QImode))
7233 range = 256;
7234 else
7235 range = 4096;
7237 else
7238 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7240 return (code == CONST_INT
7241 && INTVAL (index) < range
7242 && INTVAL (index) > -range);
7245 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7246 index operand. i.e. 1, 2, 4 or 8. */
7247 static bool
7248 thumb2_index_mul_operand (rtx op)
7250 HOST_WIDE_INT val;
7252 if (!CONST_INT_P (op))
7253 return false;
7255 val = INTVAL(op);
7256 return (val == 1 || val == 2 || val == 4 || val == 8);
7259 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7260 static int
7261 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7263 enum rtx_code code = GET_CODE (index);
7265 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7266 /* Standard coprocessor addressing modes. */
7267 if (TARGET_HARD_FLOAT
7268 && TARGET_VFP
7269 && (mode == SFmode || mode == DFmode))
7270 return (code == CONST_INT && INTVAL (index) < 1024
7271 /* Thumb-2 allows only > -256 index range for it's core register
7272 load/stores. Since we allow SF/DF in core registers, we have
7273 to use the intersection between -256~4096 (core) and -1024~1024
7274 (coprocessor). */
7275 && INTVAL (index) > -256
7276 && (INTVAL (index) & 3) == 0);
7278 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7280 /* For DImode assume values will usually live in core regs
7281 and only allow LDRD addressing modes. */
7282 if (!TARGET_LDRD || mode != DImode)
7283 return (code == CONST_INT
7284 && INTVAL (index) < 1024
7285 && INTVAL (index) > -1024
7286 && (INTVAL (index) & 3) == 0);
7289 /* For quad modes, we restrict the constant offset to be slightly less
7290 than what the instruction format permits. We do this because for
7291 quad mode moves, we will actually decompose them into two separate
7292 double-mode reads or writes. INDEX must therefore be a valid
7293 (double-mode) offset and so should INDEX+8. */
7294 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7295 return (code == CONST_INT
7296 && INTVAL (index) < 1016
7297 && INTVAL (index) > -1024
7298 && (INTVAL (index) & 3) == 0);
7300 /* We have no such constraint on double mode offsets, so we permit the
7301 full range of the instruction format. */
7302 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7303 return (code == CONST_INT
7304 && INTVAL (index) < 1024
7305 && INTVAL (index) > -1024
7306 && (INTVAL (index) & 3) == 0);
7308 if (arm_address_register_rtx_p (index, strict_p)
7309 && (GET_MODE_SIZE (mode) <= 4))
7310 return 1;
7312 if (mode == DImode || mode == DFmode)
7314 if (code == CONST_INT)
7316 HOST_WIDE_INT val = INTVAL (index);
7317 /* ??? Can we assume ldrd for thumb2? */
7318 /* Thumb-2 ldrd only has reg+const addressing modes. */
7319 /* ldrd supports offsets of +-1020.
7320 However the ldr fallback does not. */
7321 return val > -256 && val < 256 && (val & 3) == 0;
7323 else
7324 return 0;
7327 if (code == MULT)
7329 rtx xiop0 = XEXP (index, 0);
7330 rtx xiop1 = XEXP (index, 1);
7332 return ((arm_address_register_rtx_p (xiop0, strict_p)
7333 && thumb2_index_mul_operand (xiop1))
7334 || (arm_address_register_rtx_p (xiop1, strict_p)
7335 && thumb2_index_mul_operand (xiop0)));
7337 else if (code == ASHIFT)
7339 rtx op = XEXP (index, 1);
7341 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7342 && CONST_INT_P (op)
7343 && INTVAL (op) > 0
7344 && INTVAL (op) <= 3);
7347 return (code == CONST_INT
7348 && INTVAL (index) < 4096
7349 && INTVAL (index) > -256);
7352 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7353 static int
7354 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7356 int regno;
7358 if (!REG_P (x))
7359 return 0;
7361 regno = REGNO (x);
7363 if (strict_p)
7364 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7366 return (regno <= LAST_LO_REGNUM
7367 || regno > LAST_VIRTUAL_REGISTER
7368 || regno == FRAME_POINTER_REGNUM
7369 || (GET_MODE_SIZE (mode) >= 4
7370 && (regno == STACK_POINTER_REGNUM
7371 || regno >= FIRST_PSEUDO_REGISTER
7372 || x == hard_frame_pointer_rtx
7373 || x == arg_pointer_rtx)));
7376 /* Return nonzero if x is a legitimate index register. This is the case
7377 for any base register that can access a QImode object. */
7378 inline static int
7379 thumb1_index_register_rtx_p (rtx x, int strict_p)
7381 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7384 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7386 The AP may be eliminated to either the SP or the FP, so we use the
7387 least common denominator, e.g. SImode, and offsets from 0 to 64.
7389 ??? Verify whether the above is the right approach.
7391 ??? Also, the FP may be eliminated to the SP, so perhaps that
7392 needs special handling also.
7394 ??? Look at how the mips16 port solves this problem. It probably uses
7395 better ways to solve some of these problems.
7397 Although it is not incorrect, we don't accept QImode and HImode
7398 addresses based on the frame pointer or arg pointer until the
7399 reload pass starts. This is so that eliminating such addresses
7400 into stack based ones won't produce impossible code. */
7402 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7404 /* ??? Not clear if this is right. Experiment. */
7405 if (GET_MODE_SIZE (mode) < 4
7406 && !(reload_in_progress || reload_completed)
7407 && (reg_mentioned_p (frame_pointer_rtx, x)
7408 || reg_mentioned_p (arg_pointer_rtx, x)
7409 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7410 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7411 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7412 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7413 return 0;
7415 /* Accept any base register. SP only in SImode or larger. */
7416 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7417 return 1;
7419 /* This is PC relative data before arm_reorg runs. */
7420 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7421 && GET_CODE (x) == SYMBOL_REF
7422 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7423 return 1;
7425 /* This is PC relative data after arm_reorg runs. */
7426 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7427 && reload_completed
7428 && (GET_CODE (x) == LABEL_REF
7429 || (GET_CODE (x) == CONST
7430 && GET_CODE (XEXP (x, 0)) == PLUS
7431 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7432 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7433 return 1;
7435 /* Post-inc indexing only supported for SImode and larger. */
7436 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7437 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7438 return 1;
7440 else if (GET_CODE (x) == PLUS)
7442 /* REG+REG address can be any two index registers. */
7443 /* We disallow FRAME+REG addressing since we know that FRAME
7444 will be replaced with STACK, and SP relative addressing only
7445 permits SP+OFFSET. */
7446 if (GET_MODE_SIZE (mode) <= 4
7447 && XEXP (x, 0) != frame_pointer_rtx
7448 && XEXP (x, 1) != frame_pointer_rtx
7449 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7450 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7451 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7452 return 1;
7454 /* REG+const has 5-7 bit offset for non-SP registers. */
7455 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7456 || XEXP (x, 0) == arg_pointer_rtx)
7457 && CONST_INT_P (XEXP (x, 1))
7458 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7459 return 1;
7461 /* REG+const has 10-bit offset for SP, but only SImode and
7462 larger is supported. */
7463 /* ??? Should probably check for DI/DFmode overflow here
7464 just like GO_IF_LEGITIMATE_OFFSET does. */
7465 else if (REG_P (XEXP (x, 0))
7466 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7467 && GET_MODE_SIZE (mode) >= 4
7468 && CONST_INT_P (XEXP (x, 1))
7469 && INTVAL (XEXP (x, 1)) >= 0
7470 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7471 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7472 return 1;
7474 else if (REG_P (XEXP (x, 0))
7475 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7476 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7477 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7478 && REGNO (XEXP (x, 0))
7479 <= LAST_VIRTUAL_POINTER_REGISTER))
7480 && GET_MODE_SIZE (mode) >= 4
7481 && CONST_INT_P (XEXP (x, 1))
7482 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7483 return 1;
7486 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7487 && GET_MODE_SIZE (mode) == 4
7488 && GET_CODE (x) == SYMBOL_REF
7489 && CONSTANT_POOL_ADDRESS_P (x)
7490 && ! (flag_pic
7491 && symbol_mentioned_p (get_pool_constant (x))
7492 && ! pcrel_constant_p (get_pool_constant (x))))
7493 return 1;
7495 return 0;
7498 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7499 instruction of mode MODE. */
7501 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7503 switch (GET_MODE_SIZE (mode))
7505 case 1:
7506 return val >= 0 && val < 32;
7508 case 2:
7509 return val >= 0 && val < 64 && (val & 1) == 0;
7511 default:
7512 return (val >= 0
7513 && (val + GET_MODE_SIZE (mode)) <= 128
7514 && (val & 3) == 0);
7518 bool
7519 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7521 if (TARGET_ARM)
7522 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7523 else if (TARGET_THUMB2)
7524 return thumb2_legitimate_address_p (mode, x, strict_p);
7525 else /* if (TARGET_THUMB1) */
7526 return thumb1_legitimate_address_p (mode, x, strict_p);
7529 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7531 Given an rtx X being reloaded into a reg required to be
7532 in class CLASS, return the class of reg to actually use.
7533 In general this is just CLASS, but for the Thumb core registers and
7534 immediate constants we prefer a LO_REGS class or a subset. */
7536 static reg_class_t
7537 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7539 if (TARGET_32BIT)
7540 return rclass;
7541 else
7543 if (rclass == GENERAL_REGS)
7544 return LO_REGS;
7545 else
7546 return rclass;
7550 /* Build the SYMBOL_REF for __tls_get_addr. */
7552 static GTY(()) rtx tls_get_addr_libfunc;
7554 static rtx
7555 get_tls_get_addr (void)
7557 if (!tls_get_addr_libfunc)
7558 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7559 return tls_get_addr_libfunc;
7563 arm_load_tp (rtx target)
7565 if (!target)
7566 target = gen_reg_rtx (SImode);
7568 if (TARGET_HARD_TP)
7570 /* Can return in any reg. */
7571 emit_insn (gen_load_tp_hard (target));
7573 else
7575 /* Always returned in r0. Immediately copy the result into a pseudo,
7576 otherwise other uses of r0 (e.g. setting up function arguments) may
7577 clobber the value. */
7579 rtx tmp;
7581 emit_insn (gen_load_tp_soft ());
7583 tmp = gen_rtx_REG (SImode, R0_REGNUM);
7584 emit_move_insn (target, tmp);
7586 return target;
7589 static rtx
7590 load_tls_operand (rtx x, rtx reg)
7592 rtx tmp;
7594 if (reg == NULL_RTX)
7595 reg = gen_reg_rtx (SImode);
7597 tmp = gen_rtx_CONST (SImode, x);
7599 emit_move_insn (reg, tmp);
7601 return reg;
7604 static rtx
7605 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7607 rtx insns, label, labelno, sum;
7609 gcc_assert (reloc != TLS_DESCSEQ);
7610 start_sequence ();
7612 labelno = GEN_INT (pic_labelno++);
7613 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7614 label = gen_rtx_CONST (VOIDmode, label);
7616 sum = gen_rtx_UNSPEC (Pmode,
7617 gen_rtvec (4, x, GEN_INT (reloc), label,
7618 GEN_INT (TARGET_ARM ? 8 : 4)),
7619 UNSPEC_TLS);
7620 reg = load_tls_operand (sum, reg);
7622 if (TARGET_ARM)
7623 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7624 else
7625 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7627 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7628 LCT_PURE, /* LCT_CONST? */
7629 Pmode, 1, reg, Pmode);
7631 insns = get_insns ();
7632 end_sequence ();
7634 return insns;
7637 static rtx
7638 arm_tls_descseq_addr (rtx x, rtx reg)
7640 rtx labelno = GEN_INT (pic_labelno++);
7641 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7642 rtx sum = gen_rtx_UNSPEC (Pmode,
7643 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7644 gen_rtx_CONST (VOIDmode, label),
7645 GEN_INT (!TARGET_ARM)),
7646 UNSPEC_TLS);
7647 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
7649 emit_insn (gen_tlscall (x, labelno));
7650 if (!reg)
7651 reg = gen_reg_rtx (SImode);
7652 else
7653 gcc_assert (REGNO (reg) != R0_REGNUM);
7655 emit_move_insn (reg, reg0);
7657 return reg;
7661 legitimize_tls_address (rtx x, rtx reg)
7663 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7664 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7666 switch (model)
7668 case TLS_MODEL_GLOBAL_DYNAMIC:
7669 if (TARGET_GNU2_TLS)
7671 reg = arm_tls_descseq_addr (x, reg);
7673 tp = arm_load_tp (NULL_RTX);
7675 dest = gen_rtx_PLUS (Pmode, tp, reg);
7677 else
7679 /* Original scheme */
7680 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7681 dest = gen_reg_rtx (Pmode);
7682 emit_libcall_block (insns, dest, ret, x);
7684 return dest;
7686 case TLS_MODEL_LOCAL_DYNAMIC:
7687 if (TARGET_GNU2_TLS)
7689 reg = arm_tls_descseq_addr (x, reg);
7691 tp = arm_load_tp (NULL_RTX);
7693 dest = gen_rtx_PLUS (Pmode, tp, reg);
7695 else
7697 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7699 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7700 share the LDM result with other LD model accesses. */
7701 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7702 UNSPEC_TLS);
7703 dest = gen_reg_rtx (Pmode);
7704 emit_libcall_block (insns, dest, ret, eqv);
7706 /* Load the addend. */
7707 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7708 GEN_INT (TLS_LDO32)),
7709 UNSPEC_TLS);
7710 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7711 dest = gen_rtx_PLUS (Pmode, dest, addend);
7713 return dest;
7715 case TLS_MODEL_INITIAL_EXEC:
7716 labelno = GEN_INT (pic_labelno++);
7717 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7718 label = gen_rtx_CONST (VOIDmode, label);
7719 sum = gen_rtx_UNSPEC (Pmode,
7720 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7721 GEN_INT (TARGET_ARM ? 8 : 4)),
7722 UNSPEC_TLS);
7723 reg = load_tls_operand (sum, reg);
7725 if (TARGET_ARM)
7726 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7727 else if (TARGET_THUMB2)
7728 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7729 else
7731 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7732 emit_move_insn (reg, gen_const_mem (SImode, reg));
7735 tp = arm_load_tp (NULL_RTX);
7737 return gen_rtx_PLUS (Pmode, tp, reg);
7739 case TLS_MODEL_LOCAL_EXEC:
7740 tp = arm_load_tp (NULL_RTX);
7742 reg = gen_rtx_UNSPEC (Pmode,
7743 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7744 UNSPEC_TLS);
7745 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7747 return gen_rtx_PLUS (Pmode, tp, reg);
7749 default:
7750 abort ();
7754 /* Try machine-dependent ways of modifying an illegitimate address
7755 to be legitimate. If we find one, return the new, valid address. */
7757 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7759 if (arm_tls_referenced_p (x))
7761 rtx addend = NULL;
7763 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7765 addend = XEXP (XEXP (x, 0), 1);
7766 x = XEXP (XEXP (x, 0), 0);
7769 if (GET_CODE (x) != SYMBOL_REF)
7770 return x;
7772 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7774 x = legitimize_tls_address (x, NULL_RTX);
7776 if (addend)
7778 x = gen_rtx_PLUS (SImode, x, addend);
7779 orig_x = x;
7781 else
7782 return x;
7785 if (!TARGET_ARM)
7787 /* TODO: legitimize_address for Thumb2. */
7788 if (TARGET_THUMB2)
7789 return x;
7790 return thumb_legitimize_address (x, orig_x, mode);
7793 if (GET_CODE (x) == PLUS)
7795 rtx xop0 = XEXP (x, 0);
7796 rtx xop1 = XEXP (x, 1);
7798 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7799 xop0 = force_reg (SImode, xop0);
7801 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7802 && !symbol_mentioned_p (xop1))
7803 xop1 = force_reg (SImode, xop1);
7805 if (ARM_BASE_REGISTER_RTX_P (xop0)
7806 && CONST_INT_P (xop1))
7808 HOST_WIDE_INT n, low_n;
7809 rtx base_reg, val;
7810 n = INTVAL (xop1);
7812 /* VFP addressing modes actually allow greater offsets, but for
7813 now we just stick with the lowest common denominator. */
7814 if (mode == DImode
7815 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7817 low_n = n & 0x0f;
7818 n &= ~0x0f;
7819 if (low_n > 4)
7821 n += 16;
7822 low_n -= 16;
7825 else
7827 low_n = ((mode) == TImode ? 0
7828 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7829 n -= low_n;
7832 base_reg = gen_reg_rtx (SImode);
7833 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7834 emit_move_insn (base_reg, val);
7835 x = plus_constant (Pmode, base_reg, low_n);
7837 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7838 x = gen_rtx_PLUS (SImode, xop0, xop1);
7841 /* XXX We don't allow MINUS any more -- see comment in
7842 arm_legitimate_address_outer_p (). */
7843 else if (GET_CODE (x) == MINUS)
7845 rtx xop0 = XEXP (x, 0);
7846 rtx xop1 = XEXP (x, 1);
7848 if (CONSTANT_P (xop0))
7849 xop0 = force_reg (SImode, xop0);
7851 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7852 xop1 = force_reg (SImode, xop1);
7854 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7855 x = gen_rtx_MINUS (SImode, xop0, xop1);
7858 /* Make sure to take full advantage of the pre-indexed addressing mode
7859 with absolute addresses which often allows for the base register to
7860 be factorized for multiple adjacent memory references, and it might
7861 even allows for the mini pool to be avoided entirely. */
7862 else if (CONST_INT_P (x) && optimize > 0)
7864 unsigned int bits;
7865 HOST_WIDE_INT mask, base, index;
7866 rtx base_reg;
7868 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7869 use a 8-bit index. So let's use a 12-bit index for SImode only and
7870 hope that arm_gen_constant will enable ldrb to use more bits. */
7871 bits = (mode == SImode) ? 12 : 8;
7872 mask = (1 << bits) - 1;
7873 base = INTVAL (x) & ~mask;
7874 index = INTVAL (x) & mask;
7875 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7877 /* It'll most probably be more efficient to generate the base
7878 with more bits set and use a negative index instead. */
7879 base |= mask;
7880 index -= mask;
7882 base_reg = force_reg (SImode, GEN_INT (base));
7883 x = plus_constant (Pmode, base_reg, index);
7886 if (flag_pic)
7888 /* We need to find and carefully transform any SYMBOL and LABEL
7889 references; so go back to the original address expression. */
7890 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7892 if (new_x != orig_x)
7893 x = new_x;
7896 return x;
7900 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7901 to be legitimate. If we find one, return the new, valid address. */
7903 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7905 if (GET_CODE (x) == PLUS
7906 && CONST_INT_P (XEXP (x, 1))
7907 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7908 || INTVAL (XEXP (x, 1)) < 0))
7910 rtx xop0 = XEXP (x, 0);
7911 rtx xop1 = XEXP (x, 1);
7912 HOST_WIDE_INT offset = INTVAL (xop1);
7914 /* Try and fold the offset into a biasing of the base register and
7915 then offsetting that. Don't do this when optimizing for space
7916 since it can cause too many CSEs. */
7917 if (optimize_size && offset >= 0
7918 && offset < 256 + 31 * GET_MODE_SIZE (mode))
7920 HOST_WIDE_INT delta;
7922 if (offset >= 256)
7923 delta = offset - (256 - GET_MODE_SIZE (mode));
7924 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7925 delta = 31 * GET_MODE_SIZE (mode);
7926 else
7927 delta = offset & (~31 * GET_MODE_SIZE (mode));
7929 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7930 NULL_RTX);
7931 x = plus_constant (Pmode, xop0, delta);
7933 else if (offset < 0 && offset > -256)
7934 /* Small negative offsets are best done with a subtract before the
7935 dereference, forcing these into a register normally takes two
7936 instructions. */
7937 x = force_operand (x, NULL_RTX);
7938 else
7940 /* For the remaining cases, force the constant into a register. */
7941 xop1 = force_reg (SImode, xop1);
7942 x = gen_rtx_PLUS (SImode, xop0, xop1);
7945 else if (GET_CODE (x) == PLUS
7946 && s_register_operand (XEXP (x, 1), SImode)
7947 && !s_register_operand (XEXP (x, 0), SImode))
7949 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7951 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7954 if (flag_pic)
7956 /* We need to find and carefully transform any SYMBOL and LABEL
7957 references; so go back to the original address expression. */
7958 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7960 if (new_x != orig_x)
7961 x = new_x;
7964 return x;
7967 /* Return TRUE if X contains any TLS symbol references. */
7969 bool
7970 arm_tls_referenced_p (rtx x)
7972 if (! TARGET_HAVE_TLS)
7973 return false;
7975 subrtx_iterator::array_type array;
7976 FOR_EACH_SUBRTX (iter, array, x, ALL)
7978 const_rtx x = *iter;
7979 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
7980 return true;
7982 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7983 TLS offsets, not real symbol references. */
7984 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
7985 iter.skip_subrtxes ();
7987 return false;
7990 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
7992 On the ARM, allow any integer (invalid ones are removed later by insn
7993 patterns), nice doubles and symbol_refs which refer to the function's
7994 constant pool XXX.
7996 When generating pic allow anything. */
7998 static bool
7999 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8001 return flag_pic || !label_mentioned_p (x);
8004 static bool
8005 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8007 return (CONST_INT_P (x)
8008 || CONST_DOUBLE_P (x)
8009 || CONSTANT_ADDRESS_P (x)
8010 || flag_pic);
8013 static bool
8014 arm_legitimate_constant_p (machine_mode mode, rtx x)
8016 return (!arm_cannot_force_const_mem (mode, x)
8017 && (TARGET_32BIT
8018 ? arm_legitimate_constant_p_1 (mode, x)
8019 : thumb_legitimate_constant_p (mode, x)));
8022 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8024 static bool
8025 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8027 rtx base, offset;
8029 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8031 split_const (x, &base, &offset);
8032 if (GET_CODE (base) == SYMBOL_REF
8033 && !offset_within_block_p (base, INTVAL (offset)))
8034 return true;
8036 return arm_tls_referenced_p (x);
8039 #define REG_OR_SUBREG_REG(X) \
8040 (REG_P (X) \
8041 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8043 #define REG_OR_SUBREG_RTX(X) \
8044 (REG_P (X) ? (X) : SUBREG_REG (X))
8046 static inline int
8047 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8049 machine_mode mode = GET_MODE (x);
8050 int total, words;
8052 switch (code)
8054 case ASHIFT:
8055 case ASHIFTRT:
8056 case LSHIFTRT:
8057 case ROTATERT:
8058 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8060 case PLUS:
8061 case MINUS:
8062 case COMPARE:
8063 case NEG:
8064 case NOT:
8065 return COSTS_N_INSNS (1);
8067 case MULT:
8068 if (CONST_INT_P (XEXP (x, 1)))
8070 int cycles = 0;
8071 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8073 while (i)
8075 i >>= 2;
8076 cycles++;
8078 return COSTS_N_INSNS (2) + cycles;
8080 return COSTS_N_INSNS (1) + 16;
8082 case SET:
8083 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8084 the mode. */
8085 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8086 return (COSTS_N_INSNS (words)
8087 + 4 * ((MEM_P (SET_SRC (x)))
8088 + MEM_P (SET_DEST (x))));
8090 case CONST_INT:
8091 if (outer == SET)
8093 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8094 return 0;
8095 if (thumb_shiftable_const (INTVAL (x)))
8096 return COSTS_N_INSNS (2);
8097 return COSTS_N_INSNS (3);
8099 else if ((outer == PLUS || outer == COMPARE)
8100 && INTVAL (x) < 256 && INTVAL (x) > -256)
8101 return 0;
8102 else if ((outer == IOR || outer == XOR || outer == AND)
8103 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8104 return COSTS_N_INSNS (1);
8105 else if (outer == AND)
8107 int i;
8108 /* This duplicates the tests in the andsi3 expander. */
8109 for (i = 9; i <= 31; i++)
8110 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8111 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8112 return COSTS_N_INSNS (2);
8114 else if (outer == ASHIFT || outer == ASHIFTRT
8115 || outer == LSHIFTRT)
8116 return 0;
8117 return COSTS_N_INSNS (2);
8119 case CONST:
8120 case CONST_DOUBLE:
8121 case LABEL_REF:
8122 case SYMBOL_REF:
8123 return COSTS_N_INSNS (3);
8125 case UDIV:
8126 case UMOD:
8127 case DIV:
8128 case MOD:
8129 return 100;
8131 case TRUNCATE:
8132 return 99;
8134 case AND:
8135 case XOR:
8136 case IOR:
8137 /* XXX guess. */
8138 return 8;
8140 case MEM:
8141 /* XXX another guess. */
8142 /* Memory costs quite a lot for the first word, but subsequent words
8143 load at the equivalent of a single insn each. */
8144 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8145 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8146 ? 4 : 0));
8148 case IF_THEN_ELSE:
8149 /* XXX a guess. */
8150 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8151 return 14;
8152 return 2;
8154 case SIGN_EXTEND:
8155 case ZERO_EXTEND:
8156 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8157 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8159 if (mode == SImode)
8160 return total;
8162 if (arm_arch6)
8163 return total + COSTS_N_INSNS (1);
8165 /* Assume a two-shift sequence. Increase the cost slightly so
8166 we prefer actual shifts over an extend operation. */
8167 return total + 1 + COSTS_N_INSNS (2);
8169 default:
8170 return 99;
8174 static inline bool
8175 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8177 machine_mode mode = GET_MODE (x);
8178 enum rtx_code subcode;
8179 rtx operand;
8180 enum rtx_code code = GET_CODE (x);
8181 *total = 0;
8183 switch (code)
8185 case MEM:
8186 /* Memory costs quite a lot for the first word, but subsequent words
8187 load at the equivalent of a single insn each. */
8188 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8189 return true;
8191 case DIV:
8192 case MOD:
8193 case UDIV:
8194 case UMOD:
8195 if (TARGET_HARD_FLOAT && mode == SFmode)
8196 *total = COSTS_N_INSNS (2);
8197 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8198 *total = COSTS_N_INSNS (4);
8199 else
8200 *total = COSTS_N_INSNS (20);
8201 return false;
8203 case ROTATE:
8204 if (REG_P (XEXP (x, 1)))
8205 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8206 else if (!CONST_INT_P (XEXP (x, 1)))
8207 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8209 /* Fall through */
8210 case ROTATERT:
8211 if (mode != SImode)
8213 *total += COSTS_N_INSNS (4);
8214 return true;
8217 /* Fall through */
8218 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8219 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8220 if (mode == DImode)
8222 *total += COSTS_N_INSNS (3);
8223 return true;
8226 *total += COSTS_N_INSNS (1);
8227 /* Increase the cost of complex shifts because they aren't any faster,
8228 and reduce dual issue opportunities. */
8229 if (arm_tune_cortex_a9
8230 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8231 ++*total;
8233 return true;
8235 case MINUS:
8236 if (mode == DImode)
8238 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8239 if (CONST_INT_P (XEXP (x, 0))
8240 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8242 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8243 return true;
8246 if (CONST_INT_P (XEXP (x, 1))
8247 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8249 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8250 return true;
8253 return false;
8256 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8258 if (TARGET_HARD_FLOAT
8259 && (mode == SFmode
8260 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8262 *total = COSTS_N_INSNS (1);
8263 if (CONST_DOUBLE_P (XEXP (x, 0))
8264 && arm_const_double_rtx (XEXP (x, 0)))
8266 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8267 return true;
8270 if (CONST_DOUBLE_P (XEXP (x, 1))
8271 && arm_const_double_rtx (XEXP (x, 1)))
8273 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8274 return true;
8277 return false;
8279 *total = COSTS_N_INSNS (20);
8280 return false;
8283 *total = COSTS_N_INSNS (1);
8284 if (CONST_INT_P (XEXP (x, 0))
8285 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8287 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8288 return true;
8291 subcode = GET_CODE (XEXP (x, 1));
8292 if (subcode == ASHIFT || subcode == ASHIFTRT
8293 || subcode == LSHIFTRT
8294 || subcode == ROTATE || subcode == ROTATERT)
8296 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8297 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8298 return true;
8301 /* A shift as a part of RSB costs no more than RSB itself. */
8302 if (GET_CODE (XEXP (x, 0)) == MULT
8303 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8305 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8306 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8307 return true;
8310 if (subcode == MULT
8311 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8313 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8314 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8315 return true;
8318 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8319 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8321 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8322 if (REG_P (XEXP (XEXP (x, 1), 0))
8323 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8324 *total += COSTS_N_INSNS (1);
8326 return true;
8329 /* Fall through */
8331 case PLUS:
8332 if (code == PLUS && arm_arch6 && mode == SImode
8333 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8334 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8336 *total = COSTS_N_INSNS (1);
8337 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8338 0, speed);
8339 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8340 return true;
8343 /* MLA: All arguments must be registers. We filter out
8344 multiplication by a power of two, so that we fall down into
8345 the code below. */
8346 if (GET_CODE (XEXP (x, 0)) == MULT
8347 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8349 /* The cost comes from the cost of the multiply. */
8350 return false;
8353 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8355 if (TARGET_HARD_FLOAT
8356 && (mode == SFmode
8357 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8359 *total = COSTS_N_INSNS (1);
8360 if (CONST_DOUBLE_P (XEXP (x, 1))
8361 && arm_const_double_rtx (XEXP (x, 1)))
8363 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8364 return true;
8367 return false;
8370 *total = COSTS_N_INSNS (20);
8371 return false;
8374 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8375 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8377 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8378 if (REG_P (XEXP (XEXP (x, 0), 0))
8379 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8380 *total += COSTS_N_INSNS (1);
8381 return true;
8384 /* Fall through */
8386 case AND: case XOR: case IOR:
8388 /* Normally the frame registers will be spilt into reg+const during
8389 reload, so it is a bad idea to combine them with other instructions,
8390 since then they might not be moved outside of loops. As a compromise
8391 we allow integration with ops that have a constant as their second
8392 operand. */
8393 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8394 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8395 && !CONST_INT_P (XEXP (x, 1)))
8396 *total = COSTS_N_INSNS (1);
8398 if (mode == DImode)
8400 *total += COSTS_N_INSNS (2);
8401 if (CONST_INT_P (XEXP (x, 1))
8402 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8404 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8405 return true;
8408 return false;
8411 *total += COSTS_N_INSNS (1);
8412 if (CONST_INT_P (XEXP (x, 1))
8413 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8415 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8416 return true;
8418 subcode = GET_CODE (XEXP (x, 0));
8419 if (subcode == ASHIFT || subcode == ASHIFTRT
8420 || subcode == LSHIFTRT
8421 || subcode == ROTATE || subcode == ROTATERT)
8423 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8424 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8425 return true;
8428 if (subcode == MULT
8429 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8431 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8432 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8433 return true;
8436 if (subcode == UMIN || subcode == UMAX
8437 || subcode == SMIN || subcode == SMAX)
8439 *total = COSTS_N_INSNS (3);
8440 return true;
8443 return false;
8445 case MULT:
8446 /* This should have been handled by the CPU specific routines. */
8447 gcc_unreachable ();
8449 case TRUNCATE:
8450 if (arm_arch3m && mode == SImode
8451 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8452 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8453 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8454 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8455 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8456 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8458 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8459 return true;
8461 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8462 return false;
8464 case NEG:
8465 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8467 if (TARGET_HARD_FLOAT
8468 && (mode == SFmode
8469 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8471 *total = COSTS_N_INSNS (1);
8472 return false;
8474 *total = COSTS_N_INSNS (2);
8475 return false;
8478 /* Fall through */
8479 case NOT:
8480 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8481 if (mode == SImode && code == NOT)
8483 subcode = GET_CODE (XEXP (x, 0));
8484 if (subcode == ASHIFT || subcode == ASHIFTRT
8485 || subcode == LSHIFTRT
8486 || subcode == ROTATE || subcode == ROTATERT
8487 || (subcode == MULT
8488 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8490 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8491 /* Register shifts cost an extra cycle. */
8492 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8493 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8494 subcode, 1, speed);
8495 return true;
8499 return false;
8501 case IF_THEN_ELSE:
8502 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8504 *total = COSTS_N_INSNS (4);
8505 return true;
8508 operand = XEXP (x, 0);
8510 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8511 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8512 && REG_P (XEXP (operand, 0))
8513 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8514 *total += COSTS_N_INSNS (1);
8515 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8516 + rtx_cost (XEXP (x, 2), code, 2, speed));
8517 return true;
8519 case NE:
8520 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8522 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8523 return true;
8525 goto scc_insn;
8527 case GE:
8528 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8529 && mode == SImode && XEXP (x, 1) == const0_rtx)
8531 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8532 return true;
8534 goto scc_insn;
8536 case LT:
8537 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8538 && mode == SImode && XEXP (x, 1) == const0_rtx)
8540 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8541 return true;
8543 goto scc_insn;
8545 case EQ:
8546 case GT:
8547 case LE:
8548 case GEU:
8549 case LTU:
8550 case GTU:
8551 case LEU:
8552 case UNORDERED:
8553 case ORDERED:
8554 case UNEQ:
8555 case UNGE:
8556 case UNLT:
8557 case UNGT:
8558 case UNLE:
8559 scc_insn:
8560 /* SCC insns. In the case where the comparison has already been
8561 performed, then they cost 2 instructions. Otherwise they need
8562 an additional comparison before them. */
8563 *total = COSTS_N_INSNS (2);
8564 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8566 return true;
8569 /* Fall through */
8570 case COMPARE:
8571 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8573 *total = 0;
8574 return true;
8577 *total += COSTS_N_INSNS (1);
8578 if (CONST_INT_P (XEXP (x, 1))
8579 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8581 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8582 return true;
8585 subcode = GET_CODE (XEXP (x, 0));
8586 if (subcode == ASHIFT || subcode == ASHIFTRT
8587 || subcode == LSHIFTRT
8588 || subcode == ROTATE || subcode == ROTATERT)
8590 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8591 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8592 return true;
8595 if (subcode == MULT
8596 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8598 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8599 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8600 return true;
8603 return false;
8605 case UMIN:
8606 case UMAX:
8607 case SMIN:
8608 case SMAX:
8609 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8610 if (!CONST_INT_P (XEXP (x, 1))
8611 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8612 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8613 return true;
8615 case ABS:
8616 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8618 if (TARGET_HARD_FLOAT
8619 && (mode == SFmode
8620 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8622 *total = COSTS_N_INSNS (1);
8623 return false;
8625 *total = COSTS_N_INSNS (20);
8626 return false;
8628 *total = COSTS_N_INSNS (1);
8629 if (mode == DImode)
8630 *total += COSTS_N_INSNS (3);
8631 return false;
8633 case SIGN_EXTEND:
8634 case ZERO_EXTEND:
8635 *total = 0;
8636 if (GET_MODE_CLASS (mode) == MODE_INT)
8638 rtx op = XEXP (x, 0);
8639 machine_mode opmode = GET_MODE (op);
8641 if (mode == DImode)
8642 *total += COSTS_N_INSNS (1);
8644 if (opmode != SImode)
8646 if (MEM_P (op))
8648 /* If !arm_arch4, we use one of the extendhisi2_mem
8649 or movhi_bytes patterns for HImode. For a QImode
8650 sign extension, we first zero-extend from memory
8651 and then perform a shift sequence. */
8652 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8653 *total += COSTS_N_INSNS (2);
8655 else if (arm_arch6)
8656 *total += COSTS_N_INSNS (1);
8658 /* We don't have the necessary insn, so we need to perform some
8659 other operation. */
8660 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8661 /* An and with constant 255. */
8662 *total += COSTS_N_INSNS (1);
8663 else
8664 /* A shift sequence. Increase costs slightly to avoid
8665 combining two shifts into an extend operation. */
8666 *total += COSTS_N_INSNS (2) + 1;
8669 return false;
8672 switch (GET_MODE (XEXP (x, 0)))
8674 case V8QImode:
8675 case V4HImode:
8676 case V2SImode:
8677 case V4QImode:
8678 case V2HImode:
8679 *total = COSTS_N_INSNS (1);
8680 return false;
8682 default:
8683 gcc_unreachable ();
8685 gcc_unreachable ();
8687 case ZERO_EXTRACT:
8688 case SIGN_EXTRACT:
8689 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8690 return true;
8692 case CONST_INT:
8693 if (const_ok_for_arm (INTVAL (x))
8694 || const_ok_for_arm (~INTVAL (x)))
8695 *total = COSTS_N_INSNS (1);
8696 else
8697 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8698 INTVAL (x), NULL_RTX,
8699 NULL_RTX, 0, 0));
8700 return true;
8702 case CONST:
8703 case LABEL_REF:
8704 case SYMBOL_REF:
8705 *total = COSTS_N_INSNS (3);
8706 return true;
8708 case HIGH:
8709 *total = COSTS_N_INSNS (1);
8710 return true;
8712 case LO_SUM:
8713 *total = COSTS_N_INSNS (1);
8714 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8715 return true;
8717 case CONST_DOUBLE:
8718 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8719 && (mode == SFmode || !TARGET_VFP_SINGLE))
8720 *total = COSTS_N_INSNS (1);
8721 else
8722 *total = COSTS_N_INSNS (4);
8723 return true;
8725 case SET:
8726 /* The vec_extract patterns accept memory operands that require an
8727 address reload. Account for the cost of that reload to give the
8728 auto-inc-dec pass an incentive to try to replace them. */
8729 if (TARGET_NEON && MEM_P (SET_DEST (x))
8730 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8732 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8733 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8734 *total += COSTS_N_INSNS (1);
8735 return true;
8737 /* Likewise for the vec_set patterns. */
8738 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8739 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8740 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8742 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8743 *total = rtx_cost (mem, code, 0, speed);
8744 if (!neon_vector_mem_operand (mem, 2, true))
8745 *total += COSTS_N_INSNS (1);
8746 return true;
8748 return false;
8750 case UNSPEC:
8751 /* We cost this as high as our memory costs to allow this to
8752 be hoisted from loops. */
8753 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8755 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8757 return true;
8759 case CONST_VECTOR:
8760 if (TARGET_NEON
8761 && TARGET_HARD_FLOAT
8762 && outer == SET
8763 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8764 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8765 *total = COSTS_N_INSNS (1);
8766 else
8767 *total = COSTS_N_INSNS (4);
8768 return true;
8770 default:
8771 *total = COSTS_N_INSNS (4);
8772 return false;
8776 /* Estimates the size cost of thumb1 instructions.
8777 For now most of the code is copied from thumb1_rtx_costs. We need more
8778 fine grain tuning when we have more related test cases. */
8779 static inline int
8780 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8782 machine_mode mode = GET_MODE (x);
8783 int words;
8785 switch (code)
8787 case ASHIFT:
8788 case ASHIFTRT:
8789 case LSHIFTRT:
8790 case ROTATERT:
8791 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8793 case PLUS:
8794 case MINUS:
8795 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8796 defined by RTL expansion, especially for the expansion of
8797 multiplication. */
8798 if ((GET_CODE (XEXP (x, 0)) == MULT
8799 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8800 || (GET_CODE (XEXP (x, 1)) == MULT
8801 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8802 return COSTS_N_INSNS (2);
8803 /* On purpose fall through for normal RTX. */
8804 case COMPARE:
8805 case NEG:
8806 case NOT:
8807 return COSTS_N_INSNS (1);
8809 case MULT:
8810 if (CONST_INT_P (XEXP (x, 1)))
8812 /* Thumb1 mul instruction can't operate on const. We must Load it
8813 into a register first. */
8814 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8815 /* For the targets which have a very small and high-latency multiply
8816 unit, we prefer to synthesize the mult with up to 5 instructions,
8817 giving a good balance between size and performance. */
8818 if (arm_arch6m && arm_m_profile_small_mul)
8819 return COSTS_N_INSNS (5);
8820 else
8821 return COSTS_N_INSNS (1) + const_size;
8823 return COSTS_N_INSNS (1);
8825 case SET:
8826 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8827 the mode. */
8828 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8829 return COSTS_N_INSNS (words)
8830 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x))
8831 || satisfies_constraint_K (SET_SRC (x))
8832 /* thumb1_movdi_insn. */
8833 || ((words > 1) && MEM_P (SET_SRC (x))));
8835 case CONST_INT:
8836 if (outer == SET)
8838 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8839 return COSTS_N_INSNS (1);
8840 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8841 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8842 return COSTS_N_INSNS (2);
8843 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8844 if (thumb_shiftable_const (INTVAL (x)))
8845 return COSTS_N_INSNS (2);
8846 return COSTS_N_INSNS (3);
8848 else if ((outer == PLUS || outer == COMPARE)
8849 && INTVAL (x) < 256 && INTVAL (x) > -256)
8850 return 0;
8851 else if ((outer == IOR || outer == XOR || outer == AND)
8852 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8853 return COSTS_N_INSNS (1);
8854 else if (outer == AND)
8856 int i;
8857 /* This duplicates the tests in the andsi3 expander. */
8858 for (i = 9; i <= 31; i++)
8859 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8860 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8861 return COSTS_N_INSNS (2);
8863 else if (outer == ASHIFT || outer == ASHIFTRT
8864 || outer == LSHIFTRT)
8865 return 0;
8866 return COSTS_N_INSNS (2);
8868 case CONST:
8869 case CONST_DOUBLE:
8870 case LABEL_REF:
8871 case SYMBOL_REF:
8872 return COSTS_N_INSNS (3);
8874 case UDIV:
8875 case UMOD:
8876 case DIV:
8877 case MOD:
8878 return 100;
8880 case TRUNCATE:
8881 return 99;
8883 case AND:
8884 case XOR:
8885 case IOR:
8886 return COSTS_N_INSNS (1);
8888 case MEM:
8889 return (COSTS_N_INSNS (1)
8890 + COSTS_N_INSNS (1)
8891 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8892 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8893 ? COSTS_N_INSNS (1) : 0));
8895 case IF_THEN_ELSE:
8896 /* XXX a guess. */
8897 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8898 return 14;
8899 return 2;
8901 case ZERO_EXTEND:
8902 /* XXX still guessing. */
8903 switch (GET_MODE (XEXP (x, 0)))
8905 case QImode:
8906 return (1 + (mode == DImode ? 4 : 0)
8907 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8909 case HImode:
8910 return (4 + (mode == DImode ? 4 : 0)
8911 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8913 case SImode:
8914 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8916 default:
8917 return 99;
8920 default:
8921 return 99;
8925 /* RTX costs when optimizing for size. */
8926 static bool
8927 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8928 int *total)
8930 machine_mode mode = GET_MODE (x);
8931 if (TARGET_THUMB1)
8933 *total = thumb1_size_rtx_costs (x, code, outer_code);
8934 return true;
8937 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
8938 switch (code)
8940 case MEM:
8941 /* A memory access costs 1 insn if the mode is small, or the address is
8942 a single register, otherwise it costs one insn per word. */
8943 if (REG_P (XEXP (x, 0)))
8944 *total = COSTS_N_INSNS (1);
8945 else if (flag_pic
8946 && GET_CODE (XEXP (x, 0)) == PLUS
8947 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
8948 /* This will be split into two instructions.
8949 See arm.md:calculate_pic_address. */
8950 *total = COSTS_N_INSNS (2);
8951 else
8952 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8953 return true;
8955 case DIV:
8956 case MOD:
8957 case UDIV:
8958 case UMOD:
8959 /* Needs a libcall, so it costs about this. */
8960 *total = COSTS_N_INSNS (2);
8961 return false;
8963 case ROTATE:
8964 if (mode == SImode && REG_P (XEXP (x, 1)))
8966 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
8967 return true;
8969 /* Fall through */
8970 case ROTATERT:
8971 case ASHIFT:
8972 case LSHIFTRT:
8973 case ASHIFTRT:
8974 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
8976 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
8977 return true;
8979 else if (mode == SImode)
8981 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
8982 /* Slightly disparage register shifts, but not by much. */
8983 if (!CONST_INT_P (XEXP (x, 1)))
8984 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
8985 return true;
8988 /* Needs a libcall. */
8989 *total = COSTS_N_INSNS (2);
8990 return false;
8992 case MINUS:
8993 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8994 && (mode == SFmode || !TARGET_VFP_SINGLE))
8996 *total = COSTS_N_INSNS (1);
8997 return false;
9000 if (mode == SImode)
9002 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9003 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9005 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9006 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9007 || subcode1 == ROTATE || subcode1 == ROTATERT
9008 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9009 || subcode1 == ASHIFTRT)
9011 /* It's just the cost of the two operands. */
9012 *total = 0;
9013 return false;
9016 *total = COSTS_N_INSNS (1);
9017 return false;
9020 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9021 return false;
9023 case PLUS:
9024 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9025 && (mode == SFmode || !TARGET_VFP_SINGLE))
9027 *total = COSTS_N_INSNS (1);
9028 return false;
9031 /* A shift as a part of ADD costs nothing. */
9032 if (GET_CODE (XEXP (x, 0)) == MULT
9033 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9035 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9036 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
9037 *total += rtx_cost (XEXP (x, 1), code, 1, false);
9038 return true;
9041 /* Fall through */
9042 case AND: case XOR: case IOR:
9043 if (mode == SImode)
9045 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9047 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9048 || subcode == LSHIFTRT || subcode == ASHIFTRT
9049 || (code == AND && subcode == NOT))
9051 /* It's just the cost of the two operands. */
9052 *total = 0;
9053 return false;
9057 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9058 return false;
9060 case MULT:
9061 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9062 return false;
9064 case NEG:
9065 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9066 && (mode == SFmode || !TARGET_VFP_SINGLE))
9068 *total = COSTS_N_INSNS (1);
9069 return false;
9072 /* Fall through */
9073 case NOT:
9074 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9076 return false;
9078 case IF_THEN_ELSE:
9079 *total = 0;
9080 return false;
9082 case COMPARE:
9083 if (cc_register (XEXP (x, 0), VOIDmode))
9084 * total = 0;
9085 else
9086 *total = COSTS_N_INSNS (1);
9087 return false;
9089 case ABS:
9090 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9091 && (mode == SFmode || !TARGET_VFP_SINGLE))
9092 *total = COSTS_N_INSNS (1);
9093 else
9094 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9095 return false;
9097 case SIGN_EXTEND:
9098 case ZERO_EXTEND:
9099 return arm_rtx_costs_1 (x, outer_code, total, 0);
9101 case CONST_INT:
9102 if (const_ok_for_arm (INTVAL (x)))
9103 /* A multiplication by a constant requires another instruction
9104 to load the constant to a register. */
9105 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9106 ? 1 : 0);
9107 else if (const_ok_for_arm (~INTVAL (x)))
9108 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9109 else if (const_ok_for_arm (-INTVAL (x)))
9111 if (outer_code == COMPARE || outer_code == PLUS
9112 || outer_code == MINUS)
9113 *total = 0;
9114 else
9115 *total = COSTS_N_INSNS (1);
9117 else
9118 *total = COSTS_N_INSNS (2);
9119 return true;
9121 case CONST:
9122 case LABEL_REF:
9123 case SYMBOL_REF:
9124 *total = COSTS_N_INSNS (2);
9125 return true;
9127 case CONST_DOUBLE:
9128 *total = COSTS_N_INSNS (4);
9129 return true;
9131 case CONST_VECTOR:
9132 if (TARGET_NEON
9133 && TARGET_HARD_FLOAT
9134 && outer_code == SET
9135 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9136 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9137 *total = COSTS_N_INSNS (1);
9138 else
9139 *total = COSTS_N_INSNS (4);
9140 return true;
9142 case HIGH:
9143 case LO_SUM:
9144 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9145 cost of these slightly. */
9146 *total = COSTS_N_INSNS (1) + 1;
9147 return true;
9149 case SET:
9150 return false;
9152 default:
9153 if (mode != VOIDmode)
9154 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9155 else
9156 *total = COSTS_N_INSNS (4); /* How knows? */
9157 return false;
9161 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9162 operand, then return the operand that is being shifted. If the shift
9163 is not by a constant, then set SHIFT_REG to point to the operand.
9164 Return NULL if OP is not a shifter operand. */
9165 static rtx
9166 shifter_op_p (rtx op, rtx *shift_reg)
9168 enum rtx_code code = GET_CODE (op);
9170 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9171 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9172 return XEXP (op, 0);
9173 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9174 return XEXP (op, 0);
9175 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9176 || code == ASHIFTRT)
9178 if (!CONST_INT_P (XEXP (op, 1)))
9179 *shift_reg = XEXP (op, 1);
9180 return XEXP (op, 0);
9183 return NULL;
9186 static bool
9187 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9189 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9190 gcc_assert (GET_CODE (x) == UNSPEC);
9192 switch (XINT (x, 1))
9194 case UNSPEC_UNALIGNED_LOAD:
9195 /* We can only do unaligned loads into the integer unit, and we can't
9196 use LDM or LDRD. */
9197 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9198 if (speed_p)
9199 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9200 + extra_cost->ldst.load_unaligned);
9202 #ifdef NOT_YET
9203 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9204 ADDR_SPACE_GENERIC, speed_p);
9205 #endif
9206 return true;
9208 case UNSPEC_UNALIGNED_STORE:
9209 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9210 if (speed_p)
9211 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9212 + extra_cost->ldst.store_unaligned);
9214 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9215 #ifdef NOT_YET
9216 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9217 ADDR_SPACE_GENERIC, speed_p);
9218 #endif
9219 return true;
9221 case UNSPEC_VRINTZ:
9222 case UNSPEC_VRINTP:
9223 case UNSPEC_VRINTM:
9224 case UNSPEC_VRINTR:
9225 case UNSPEC_VRINTX:
9226 case UNSPEC_VRINTA:
9227 *cost = COSTS_N_INSNS (1);
9228 if (speed_p)
9229 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9231 return true;
9232 default:
9233 *cost = COSTS_N_INSNS (2);
9234 break;
9236 return false;
9239 /* Cost of a libcall. We assume one insn per argument, an amount for the
9240 call (one insn for -Os) and then one for processing the result. */
9241 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9243 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9244 do \
9246 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9247 if (shift_op != NULL \
9248 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9250 if (shift_reg) \
9252 if (speed_p) \
9253 *cost += extra_cost->alu.arith_shift_reg; \
9254 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9256 else if (speed_p) \
9257 *cost += extra_cost->alu.arith_shift; \
9259 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9260 + rtx_cost (XEXP (x, 1 - IDX), \
9261 OP, 1, speed_p)); \
9262 return true; \
9265 while (0);
9267 /* RTX costs. Make an estimate of the cost of executing the operation
9268 X, which is contained with an operation with code OUTER_CODE.
9269 SPEED_P indicates whether the cost desired is the performance cost,
9270 or the size cost. The estimate is stored in COST and the return
9271 value is TRUE if the cost calculation is final, or FALSE if the
9272 caller should recurse through the operands of X to add additional
9273 costs.
9275 We currently make no attempt to model the size savings of Thumb-2
9276 16-bit instructions. At the normal points in compilation where
9277 this code is called we have no measure of whether the condition
9278 flags are live or not, and thus no realistic way to determine what
9279 the size will eventually be. */
9280 static bool
9281 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9282 const struct cpu_cost_table *extra_cost,
9283 int *cost, bool speed_p)
9285 machine_mode mode = GET_MODE (x);
9287 if (TARGET_THUMB1)
9289 if (speed_p)
9290 *cost = thumb1_rtx_costs (x, code, outer_code);
9291 else
9292 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9293 return true;
9296 switch (code)
9298 case SET:
9299 *cost = 0;
9300 /* SET RTXs don't have a mode so we get it from the destination. */
9301 mode = GET_MODE (SET_DEST (x));
9303 if (REG_P (SET_SRC (x))
9304 && REG_P (SET_DEST (x)))
9306 /* Assume that most copies can be done with a single insn,
9307 unless we don't have HW FP, in which case everything
9308 larger than word mode will require two insns. */
9309 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9310 && GET_MODE_SIZE (mode) > 4)
9311 || mode == DImode)
9312 ? 2 : 1);
9313 /* Conditional register moves can be encoded
9314 in 16 bits in Thumb mode. */
9315 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9316 *cost >>= 1;
9318 return true;
9321 if (CONST_INT_P (SET_SRC (x)))
9323 /* Handle CONST_INT here, since the value doesn't have a mode
9324 and we would otherwise be unable to work out the true cost. */
9325 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9326 outer_code = SET;
9327 /* Slightly lower the cost of setting a core reg to a constant.
9328 This helps break up chains and allows for better scheduling. */
9329 if (REG_P (SET_DEST (x))
9330 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9331 *cost -= 1;
9332 x = SET_SRC (x);
9333 /* Immediate moves with an immediate in the range [0, 255] can be
9334 encoded in 16 bits in Thumb mode. */
9335 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9336 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9337 *cost >>= 1;
9338 goto const_int_cost;
9341 return false;
9343 case MEM:
9344 /* A memory access costs 1 insn if the mode is small, or the address is
9345 a single register, otherwise it costs one insn per word. */
9346 if (REG_P (XEXP (x, 0)))
9347 *cost = COSTS_N_INSNS (1);
9348 else if (flag_pic
9349 && GET_CODE (XEXP (x, 0)) == PLUS
9350 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9351 /* This will be split into two instructions.
9352 See arm.md:calculate_pic_address. */
9353 *cost = COSTS_N_INSNS (2);
9354 else
9355 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9357 /* For speed optimizations, add the costs of the address and
9358 accessing memory. */
9359 if (speed_p)
9360 #ifdef NOT_YET
9361 *cost += (extra_cost->ldst.load
9362 + arm_address_cost (XEXP (x, 0), mode,
9363 ADDR_SPACE_GENERIC, speed_p));
9364 #else
9365 *cost += extra_cost->ldst.load;
9366 #endif
9367 return true;
9369 case PARALLEL:
9371 /* Calculations of LDM costs are complex. We assume an initial cost
9372 (ldm_1st) which will load the number of registers mentioned in
9373 ldm_regs_per_insn_1st registers; then each additional
9374 ldm_regs_per_insn_subsequent registers cost one more insn. The
9375 formula for N regs is thus:
9377 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9378 + ldm_regs_per_insn_subsequent - 1)
9379 / ldm_regs_per_insn_subsequent).
9381 Additional costs may also be added for addressing. A similar
9382 formula is used for STM. */
9384 bool is_ldm = load_multiple_operation (x, SImode);
9385 bool is_stm = store_multiple_operation (x, SImode);
9387 *cost = COSTS_N_INSNS (1);
9389 if (is_ldm || is_stm)
9391 if (speed_p)
9393 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9394 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9395 ? extra_cost->ldst.ldm_regs_per_insn_1st
9396 : extra_cost->ldst.stm_regs_per_insn_1st;
9397 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9398 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9399 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9401 *cost += regs_per_insn_1st
9402 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9403 + regs_per_insn_sub - 1)
9404 / regs_per_insn_sub);
9405 return true;
9409 return false;
9411 case DIV:
9412 case UDIV:
9413 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9414 && (mode == SFmode || !TARGET_VFP_SINGLE))
9415 *cost = COSTS_N_INSNS (speed_p
9416 ? extra_cost->fp[mode != SFmode].div : 1);
9417 else if (mode == SImode && TARGET_IDIV)
9418 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9419 else
9420 *cost = LIBCALL_COST (2);
9421 return false; /* All arguments must be in registers. */
9423 case MOD:
9424 case UMOD:
9425 *cost = LIBCALL_COST (2);
9426 return false; /* All arguments must be in registers. */
9428 case ROTATE:
9429 if (mode == SImode && REG_P (XEXP (x, 1)))
9431 *cost = (COSTS_N_INSNS (2)
9432 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9433 if (speed_p)
9434 *cost += extra_cost->alu.shift_reg;
9435 return true;
9437 /* Fall through */
9438 case ROTATERT:
9439 case ASHIFT:
9440 case LSHIFTRT:
9441 case ASHIFTRT:
9442 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9444 *cost = (COSTS_N_INSNS (3)
9445 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9446 if (speed_p)
9447 *cost += 2 * extra_cost->alu.shift;
9448 return true;
9450 else if (mode == SImode)
9452 *cost = (COSTS_N_INSNS (1)
9453 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9454 /* Slightly disparage register shifts at -Os, but not by much. */
9455 if (!CONST_INT_P (XEXP (x, 1)))
9456 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9457 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9458 return true;
9460 else if (GET_MODE_CLASS (mode) == MODE_INT
9461 && GET_MODE_SIZE (mode) < 4)
9463 if (code == ASHIFT)
9465 *cost = (COSTS_N_INSNS (1)
9466 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9467 /* Slightly disparage register shifts at -Os, but not by
9468 much. */
9469 if (!CONST_INT_P (XEXP (x, 1)))
9470 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9471 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9473 else if (code == LSHIFTRT || code == ASHIFTRT)
9475 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9477 /* Can use SBFX/UBFX. */
9478 *cost = COSTS_N_INSNS (1);
9479 if (speed_p)
9480 *cost += extra_cost->alu.bfx;
9481 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9483 else
9485 *cost = COSTS_N_INSNS (2);
9486 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9487 if (speed_p)
9489 if (CONST_INT_P (XEXP (x, 1)))
9490 *cost += 2 * extra_cost->alu.shift;
9491 else
9492 *cost += (extra_cost->alu.shift
9493 + extra_cost->alu.shift_reg);
9495 else
9496 /* Slightly disparage register shifts. */
9497 *cost += !CONST_INT_P (XEXP (x, 1));
9500 else /* Rotates. */
9502 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9503 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9504 if (speed_p)
9506 if (CONST_INT_P (XEXP (x, 1)))
9507 *cost += (2 * extra_cost->alu.shift
9508 + extra_cost->alu.log_shift);
9509 else
9510 *cost += (extra_cost->alu.shift
9511 + extra_cost->alu.shift_reg
9512 + extra_cost->alu.log_shift_reg);
9515 return true;
9518 *cost = LIBCALL_COST (2);
9519 return false;
9521 case BSWAP:
9522 if (arm_arch6)
9524 if (mode == SImode)
9526 *cost = COSTS_N_INSNS (1);
9527 if (speed_p)
9528 *cost += extra_cost->alu.rev;
9530 return false;
9533 else
9535 /* No rev instruction available. Look at arm_legacy_rev
9536 and thumb_legacy_rev for the form of RTL used then. */
9537 if (TARGET_THUMB)
9539 *cost = COSTS_N_INSNS (10);
9541 if (speed_p)
9543 *cost += 6 * extra_cost->alu.shift;
9544 *cost += 3 * extra_cost->alu.logical;
9547 else
9549 *cost = COSTS_N_INSNS (5);
9551 if (speed_p)
9553 *cost += 2 * extra_cost->alu.shift;
9554 *cost += extra_cost->alu.arith_shift;
9555 *cost += 2 * extra_cost->alu.logical;
9558 return true;
9560 return false;
9562 case MINUS:
9563 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9564 && (mode == SFmode || !TARGET_VFP_SINGLE))
9566 *cost = COSTS_N_INSNS (1);
9567 if (GET_CODE (XEXP (x, 0)) == MULT
9568 || GET_CODE (XEXP (x, 1)) == MULT)
9570 rtx mul_op0, mul_op1, sub_op;
9572 if (speed_p)
9573 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9575 if (GET_CODE (XEXP (x, 0)) == MULT)
9577 mul_op0 = XEXP (XEXP (x, 0), 0);
9578 mul_op1 = XEXP (XEXP (x, 0), 1);
9579 sub_op = XEXP (x, 1);
9581 else
9583 mul_op0 = XEXP (XEXP (x, 1), 0);
9584 mul_op1 = XEXP (XEXP (x, 1), 1);
9585 sub_op = XEXP (x, 0);
9588 /* The first operand of the multiply may be optionally
9589 negated. */
9590 if (GET_CODE (mul_op0) == NEG)
9591 mul_op0 = XEXP (mul_op0, 0);
9593 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9594 + rtx_cost (mul_op1, code, 0, speed_p)
9595 + rtx_cost (sub_op, code, 0, speed_p));
9597 return true;
9600 if (speed_p)
9601 *cost += extra_cost->fp[mode != SFmode].addsub;
9602 return false;
9605 if (mode == SImode)
9607 rtx shift_by_reg = NULL;
9608 rtx shift_op;
9609 rtx non_shift_op;
9611 *cost = COSTS_N_INSNS (1);
9613 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9614 if (shift_op == NULL)
9616 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9617 non_shift_op = XEXP (x, 0);
9619 else
9620 non_shift_op = XEXP (x, 1);
9622 if (shift_op != NULL)
9624 if (shift_by_reg != NULL)
9626 if (speed_p)
9627 *cost += extra_cost->alu.arith_shift_reg;
9628 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9630 else if (speed_p)
9631 *cost += extra_cost->alu.arith_shift;
9633 *cost += (rtx_cost (shift_op, code, 0, speed_p)
9634 + rtx_cost (non_shift_op, code, 0, speed_p));
9635 return true;
9638 if (arm_arch_thumb2
9639 && GET_CODE (XEXP (x, 1)) == MULT)
9641 /* MLS. */
9642 if (speed_p)
9643 *cost += extra_cost->mult[0].add;
9644 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9645 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9646 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9647 return true;
9650 if (CONST_INT_P (XEXP (x, 0)))
9652 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9653 INTVAL (XEXP (x, 0)), NULL_RTX,
9654 NULL_RTX, 1, 0);
9655 *cost = COSTS_N_INSNS (insns);
9656 if (speed_p)
9657 *cost += insns * extra_cost->alu.arith;
9658 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9659 return true;
9661 else if (speed_p)
9662 *cost += extra_cost->alu.arith;
9664 return false;
9667 if (GET_MODE_CLASS (mode) == MODE_INT
9668 && GET_MODE_SIZE (mode) < 4)
9670 rtx shift_op, shift_reg;
9671 shift_reg = NULL;
9673 /* We check both sides of the MINUS for shifter operands since,
9674 unlike PLUS, it's not commutative. */
9676 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9677 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9679 /* Slightly disparage, as we might need to widen the result. */
9680 *cost = 1 + COSTS_N_INSNS (1);
9681 if (speed_p)
9682 *cost += extra_cost->alu.arith;
9684 if (CONST_INT_P (XEXP (x, 0)))
9686 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9687 return true;
9690 return false;
9693 if (mode == DImode)
9695 *cost = COSTS_N_INSNS (2);
9697 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9699 rtx op1 = XEXP (x, 1);
9701 if (speed_p)
9702 *cost += 2 * extra_cost->alu.arith;
9704 if (GET_CODE (op1) == ZERO_EXTEND)
9705 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9706 else
9707 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9708 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9709 0, speed_p);
9710 return true;
9712 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9714 if (speed_p)
9715 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9716 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9717 0, speed_p)
9718 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9719 return true;
9721 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9722 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9724 if (speed_p)
9725 *cost += (extra_cost->alu.arith
9726 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9727 ? extra_cost->alu.arith
9728 : extra_cost->alu.arith_shift));
9729 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9730 + rtx_cost (XEXP (XEXP (x, 1), 0),
9731 GET_CODE (XEXP (x, 1)), 0, speed_p));
9732 return true;
9735 if (speed_p)
9736 *cost += 2 * extra_cost->alu.arith;
9737 return false;
9740 /* Vector mode? */
9742 *cost = LIBCALL_COST (2);
9743 return false;
9745 case PLUS:
9746 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9747 && (mode == SFmode || !TARGET_VFP_SINGLE))
9749 *cost = COSTS_N_INSNS (1);
9750 if (GET_CODE (XEXP (x, 0)) == MULT)
9752 rtx mul_op0, mul_op1, add_op;
9754 if (speed_p)
9755 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9757 mul_op0 = XEXP (XEXP (x, 0), 0);
9758 mul_op1 = XEXP (XEXP (x, 0), 1);
9759 add_op = XEXP (x, 1);
9761 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9762 + rtx_cost (mul_op1, code, 0, speed_p)
9763 + rtx_cost (add_op, code, 0, speed_p));
9765 return true;
9768 if (speed_p)
9769 *cost += extra_cost->fp[mode != SFmode].addsub;
9770 return false;
9772 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9774 *cost = LIBCALL_COST (2);
9775 return false;
9778 /* Narrow modes can be synthesized in SImode, but the range
9779 of useful sub-operations is limited. Check for shift operations
9780 on one of the operands. Only left shifts can be used in the
9781 narrow modes. */
9782 if (GET_MODE_CLASS (mode) == MODE_INT
9783 && GET_MODE_SIZE (mode) < 4)
9785 rtx shift_op, shift_reg;
9786 shift_reg = NULL;
9788 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9790 if (CONST_INT_P (XEXP (x, 1)))
9792 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9793 INTVAL (XEXP (x, 1)), NULL_RTX,
9794 NULL_RTX, 1, 0);
9795 *cost = COSTS_N_INSNS (insns);
9796 if (speed_p)
9797 *cost += insns * extra_cost->alu.arith;
9798 /* Slightly penalize a narrow operation as the result may
9799 need widening. */
9800 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9801 return true;
9804 /* Slightly penalize a narrow operation as the result may
9805 need widening. */
9806 *cost = 1 + COSTS_N_INSNS (1);
9807 if (speed_p)
9808 *cost += extra_cost->alu.arith;
9810 return false;
9813 if (mode == SImode)
9815 rtx shift_op, shift_reg;
9817 *cost = COSTS_N_INSNS (1);
9818 if (TARGET_INT_SIMD
9819 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9820 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9822 /* UXTA[BH] or SXTA[BH]. */
9823 if (speed_p)
9824 *cost += extra_cost->alu.extend_arith;
9825 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9826 speed_p)
9827 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9828 return true;
9831 shift_reg = NULL;
9832 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9833 if (shift_op != NULL)
9835 if (shift_reg)
9837 if (speed_p)
9838 *cost += extra_cost->alu.arith_shift_reg;
9839 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9841 else if (speed_p)
9842 *cost += extra_cost->alu.arith_shift;
9844 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9845 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9846 return true;
9848 if (GET_CODE (XEXP (x, 0)) == MULT)
9850 rtx mul_op = XEXP (x, 0);
9852 *cost = COSTS_N_INSNS (1);
9854 if (TARGET_DSP_MULTIPLY
9855 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9856 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9857 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9858 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9859 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9860 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9861 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9862 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9863 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9864 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9865 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9866 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9867 == 16))))))
9869 /* SMLA[BT][BT]. */
9870 if (speed_p)
9871 *cost += extra_cost->mult[0].extend_add;
9872 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9873 SIGN_EXTEND, 0, speed_p)
9874 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9875 SIGN_EXTEND, 0, speed_p)
9876 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9877 return true;
9880 if (speed_p)
9881 *cost += extra_cost->mult[0].add;
9882 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9883 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9884 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9885 return true;
9887 if (CONST_INT_P (XEXP (x, 1)))
9889 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9890 INTVAL (XEXP (x, 1)), NULL_RTX,
9891 NULL_RTX, 1, 0);
9892 *cost = COSTS_N_INSNS (insns);
9893 if (speed_p)
9894 *cost += insns * extra_cost->alu.arith;
9895 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9896 return true;
9898 else if (speed_p)
9899 *cost += extra_cost->alu.arith;
9901 return false;
9904 if (mode == DImode)
9906 if (arm_arch3m
9907 && GET_CODE (XEXP (x, 0)) == MULT
9908 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9909 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9910 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9911 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9913 *cost = COSTS_N_INSNS (1);
9914 if (speed_p)
9915 *cost += extra_cost->mult[1].extend_add;
9916 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
9917 ZERO_EXTEND, 0, speed_p)
9918 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
9919 ZERO_EXTEND, 0, speed_p)
9920 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9921 return true;
9924 *cost = COSTS_N_INSNS (2);
9926 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9927 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9929 if (speed_p)
9930 *cost += (extra_cost->alu.arith
9931 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9932 ? extra_cost->alu.arith
9933 : extra_cost->alu.arith_shift));
9935 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9936 speed_p)
9937 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9938 return true;
9941 if (speed_p)
9942 *cost += 2 * extra_cost->alu.arith;
9943 return false;
9946 /* Vector mode? */
9947 *cost = LIBCALL_COST (2);
9948 return false;
9949 case IOR:
9950 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9952 *cost = COSTS_N_INSNS (1);
9953 if (speed_p)
9954 *cost += extra_cost->alu.rev;
9956 return true;
9958 /* Fall through. */
9959 case AND: case XOR:
9960 if (mode == SImode)
9962 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9963 rtx op0 = XEXP (x, 0);
9964 rtx shift_op, shift_reg;
9966 *cost = COSTS_N_INSNS (1);
9968 if (subcode == NOT
9969 && (code == AND
9970 || (code == IOR && TARGET_THUMB2)))
9971 op0 = XEXP (op0, 0);
9973 shift_reg = NULL;
9974 shift_op = shifter_op_p (op0, &shift_reg);
9975 if (shift_op != NULL)
9977 if (shift_reg)
9979 if (speed_p)
9980 *cost += extra_cost->alu.log_shift_reg;
9981 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9983 else if (speed_p)
9984 *cost += extra_cost->alu.log_shift;
9986 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9987 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9988 return true;
9991 if (CONST_INT_P (XEXP (x, 1)))
9993 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9994 INTVAL (XEXP (x, 1)), NULL_RTX,
9995 NULL_RTX, 1, 0);
9997 *cost = COSTS_N_INSNS (insns);
9998 if (speed_p)
9999 *cost += insns * extra_cost->alu.logical;
10000 *cost += rtx_cost (op0, code, 0, speed_p);
10001 return true;
10004 if (speed_p)
10005 *cost += extra_cost->alu.logical;
10006 *cost += (rtx_cost (op0, code, 0, speed_p)
10007 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10008 return true;
10011 if (mode == DImode)
10013 rtx op0 = XEXP (x, 0);
10014 enum rtx_code subcode = GET_CODE (op0);
10016 *cost = COSTS_N_INSNS (2);
10018 if (subcode == NOT
10019 && (code == AND
10020 || (code == IOR && TARGET_THUMB2)))
10021 op0 = XEXP (op0, 0);
10023 if (GET_CODE (op0) == ZERO_EXTEND)
10025 if (speed_p)
10026 *cost += 2 * extra_cost->alu.logical;
10028 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
10029 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10030 return true;
10032 else if (GET_CODE (op0) == SIGN_EXTEND)
10034 if (speed_p)
10035 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10037 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
10038 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10039 return true;
10042 if (speed_p)
10043 *cost += 2 * extra_cost->alu.logical;
10045 return true;
10047 /* Vector mode? */
10049 *cost = LIBCALL_COST (2);
10050 return false;
10052 case MULT:
10053 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10054 && (mode == SFmode || !TARGET_VFP_SINGLE))
10056 rtx op0 = XEXP (x, 0);
10058 *cost = COSTS_N_INSNS (1);
10060 if (GET_CODE (op0) == NEG)
10061 op0 = XEXP (op0, 0);
10063 if (speed_p)
10064 *cost += extra_cost->fp[mode != SFmode].mult;
10066 *cost += (rtx_cost (op0, MULT, 0, speed_p)
10067 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10068 return true;
10070 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10072 *cost = LIBCALL_COST (2);
10073 return false;
10076 if (mode == SImode)
10078 *cost = COSTS_N_INSNS (1);
10079 if (TARGET_DSP_MULTIPLY
10080 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10081 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10082 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10083 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10084 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10085 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10086 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10087 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10088 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10089 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10090 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10091 && (INTVAL (XEXP (XEXP (x, 1), 1))
10092 == 16))))))
10094 /* SMUL[TB][TB]. */
10095 if (speed_p)
10096 *cost += extra_cost->mult[0].extend;
10097 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10098 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10099 return true;
10101 if (speed_p)
10102 *cost += extra_cost->mult[0].simple;
10103 return false;
10106 if (mode == DImode)
10108 if (arm_arch3m
10109 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10110 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10111 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10112 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10114 *cost = COSTS_N_INSNS (1);
10115 if (speed_p)
10116 *cost += extra_cost->mult[1].extend;
10117 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10118 ZERO_EXTEND, 0, speed_p)
10119 + rtx_cost (XEXP (XEXP (x, 1), 0),
10120 ZERO_EXTEND, 0, speed_p));
10121 return true;
10124 *cost = LIBCALL_COST (2);
10125 return false;
10128 /* Vector mode? */
10129 *cost = LIBCALL_COST (2);
10130 return false;
10132 case NEG:
10133 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10134 && (mode == SFmode || !TARGET_VFP_SINGLE))
10136 *cost = COSTS_N_INSNS (1);
10137 if (speed_p)
10138 *cost += extra_cost->fp[mode != SFmode].neg;
10140 return false;
10142 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10144 *cost = LIBCALL_COST (1);
10145 return false;
10148 if (mode == SImode)
10150 if (GET_CODE (XEXP (x, 0)) == ABS)
10152 *cost = COSTS_N_INSNS (2);
10153 /* Assume the non-flag-changing variant. */
10154 if (speed_p)
10155 *cost += (extra_cost->alu.log_shift
10156 + extra_cost->alu.arith_shift);
10157 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10158 return true;
10161 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10162 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10164 *cost = COSTS_N_INSNS (2);
10165 /* No extra cost for MOV imm and MVN imm. */
10166 /* If the comparison op is using the flags, there's no further
10167 cost, otherwise we need to add the cost of the comparison. */
10168 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10169 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10170 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10172 *cost += (COSTS_N_INSNS (1)
10173 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10174 speed_p)
10175 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10176 speed_p));
10177 if (speed_p)
10178 *cost += extra_cost->alu.arith;
10180 return true;
10182 *cost = COSTS_N_INSNS (1);
10183 if (speed_p)
10184 *cost += extra_cost->alu.arith;
10185 return false;
10188 if (GET_MODE_CLASS (mode) == MODE_INT
10189 && GET_MODE_SIZE (mode) < 4)
10191 /* Slightly disparage, as we might need an extend operation. */
10192 *cost = 1 + COSTS_N_INSNS (1);
10193 if (speed_p)
10194 *cost += extra_cost->alu.arith;
10195 return false;
10198 if (mode == DImode)
10200 *cost = COSTS_N_INSNS (2);
10201 if (speed_p)
10202 *cost += 2 * extra_cost->alu.arith;
10203 return false;
10206 /* Vector mode? */
10207 *cost = LIBCALL_COST (1);
10208 return false;
10210 case NOT:
10211 if (mode == SImode)
10213 rtx shift_op;
10214 rtx shift_reg = NULL;
10216 *cost = COSTS_N_INSNS (1);
10217 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10219 if (shift_op)
10221 if (shift_reg != NULL)
10223 if (speed_p)
10224 *cost += extra_cost->alu.log_shift_reg;
10225 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10227 else if (speed_p)
10228 *cost += extra_cost->alu.log_shift;
10229 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10230 return true;
10233 if (speed_p)
10234 *cost += extra_cost->alu.logical;
10235 return false;
10237 if (mode == DImode)
10239 *cost = COSTS_N_INSNS (2);
10240 return false;
10243 /* Vector mode? */
10245 *cost += LIBCALL_COST (1);
10246 return false;
10248 case IF_THEN_ELSE:
10250 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10252 *cost = COSTS_N_INSNS (4);
10253 return true;
10255 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10256 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10258 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10259 /* Assume that if one arm of the if_then_else is a register,
10260 that it will be tied with the result and eliminate the
10261 conditional insn. */
10262 if (REG_P (XEXP (x, 1)))
10263 *cost += op2cost;
10264 else if (REG_P (XEXP (x, 2)))
10265 *cost += op1cost;
10266 else
10268 if (speed_p)
10270 if (extra_cost->alu.non_exec_costs_exec)
10271 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10272 else
10273 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10275 else
10276 *cost += op1cost + op2cost;
10279 return true;
10281 case COMPARE:
10282 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10283 *cost = 0;
10284 else
10286 machine_mode op0mode;
10287 /* We'll mostly assume that the cost of a compare is the cost of the
10288 LHS. However, there are some notable exceptions. */
10290 /* Floating point compares are never done as side-effects. */
10291 op0mode = GET_MODE (XEXP (x, 0));
10292 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10293 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10295 *cost = COSTS_N_INSNS (1);
10296 if (speed_p)
10297 *cost += extra_cost->fp[op0mode != SFmode].compare;
10299 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10301 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10302 return true;
10305 return false;
10307 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10309 *cost = LIBCALL_COST (2);
10310 return false;
10313 /* DImode compares normally take two insns. */
10314 if (op0mode == DImode)
10316 *cost = COSTS_N_INSNS (2);
10317 if (speed_p)
10318 *cost += 2 * extra_cost->alu.arith;
10319 return false;
10322 if (op0mode == SImode)
10324 rtx shift_op;
10325 rtx shift_reg;
10327 if (XEXP (x, 1) == const0_rtx
10328 && !(REG_P (XEXP (x, 0))
10329 || (GET_CODE (XEXP (x, 0)) == SUBREG
10330 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10332 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10334 /* Multiply operations that set the flags are often
10335 significantly more expensive. */
10336 if (speed_p
10337 && GET_CODE (XEXP (x, 0)) == MULT
10338 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10339 *cost += extra_cost->mult[0].flag_setting;
10341 if (speed_p
10342 && GET_CODE (XEXP (x, 0)) == PLUS
10343 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10344 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10345 0), 1), mode))
10346 *cost += extra_cost->mult[0].flag_setting;
10347 return true;
10350 shift_reg = NULL;
10351 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10352 if (shift_op != NULL)
10354 *cost = COSTS_N_INSNS (1);
10355 if (shift_reg != NULL)
10357 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10358 if (speed_p)
10359 *cost += extra_cost->alu.arith_shift_reg;
10361 else if (speed_p)
10362 *cost += extra_cost->alu.arith_shift;
10363 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10364 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10365 return true;
10368 *cost = COSTS_N_INSNS (1);
10369 if (speed_p)
10370 *cost += extra_cost->alu.arith;
10371 if (CONST_INT_P (XEXP (x, 1))
10372 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10374 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10375 return true;
10377 return false;
10380 /* Vector mode? */
10382 *cost = LIBCALL_COST (2);
10383 return false;
10385 return true;
10387 case EQ:
10388 case NE:
10389 case LT:
10390 case LE:
10391 case GT:
10392 case GE:
10393 case LTU:
10394 case LEU:
10395 case GEU:
10396 case GTU:
10397 case ORDERED:
10398 case UNORDERED:
10399 case UNEQ:
10400 case UNLE:
10401 case UNLT:
10402 case UNGE:
10403 case UNGT:
10404 case LTGT:
10405 if (outer_code == SET)
10407 /* Is it a store-flag operation? */
10408 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10409 && XEXP (x, 1) == const0_rtx)
10411 /* Thumb also needs an IT insn. */
10412 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10413 return true;
10415 if (XEXP (x, 1) == const0_rtx)
10417 switch (code)
10419 case LT:
10420 /* LSR Rd, Rn, #31. */
10421 *cost = COSTS_N_INSNS (1);
10422 if (speed_p)
10423 *cost += extra_cost->alu.shift;
10424 break;
10426 case EQ:
10427 /* RSBS T1, Rn, #0
10428 ADC Rd, Rn, T1. */
10430 case NE:
10431 /* SUBS T1, Rn, #1
10432 SBC Rd, Rn, T1. */
10433 *cost = COSTS_N_INSNS (2);
10434 break;
10436 case LE:
10437 /* RSBS T1, Rn, Rn, LSR #31
10438 ADC Rd, Rn, T1. */
10439 *cost = COSTS_N_INSNS (2);
10440 if (speed_p)
10441 *cost += extra_cost->alu.arith_shift;
10442 break;
10444 case GT:
10445 /* RSB Rd, Rn, Rn, ASR #1
10446 LSR Rd, Rd, #31. */
10447 *cost = COSTS_N_INSNS (2);
10448 if (speed_p)
10449 *cost += (extra_cost->alu.arith_shift
10450 + extra_cost->alu.shift);
10451 break;
10453 case GE:
10454 /* ASR Rd, Rn, #31
10455 ADD Rd, Rn, #1. */
10456 *cost = COSTS_N_INSNS (2);
10457 if (speed_p)
10458 *cost += extra_cost->alu.shift;
10459 break;
10461 default:
10462 /* Remaining cases are either meaningless or would take
10463 three insns anyway. */
10464 *cost = COSTS_N_INSNS (3);
10465 break;
10467 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10468 return true;
10470 else
10472 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10473 if (CONST_INT_P (XEXP (x, 1))
10474 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10476 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10477 return true;
10480 return false;
10483 /* Not directly inside a set. If it involves the condition code
10484 register it must be the condition for a branch, cond_exec or
10485 I_T_E operation. Since the comparison is performed elsewhere
10486 this is just the control part which has no additional
10487 cost. */
10488 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10489 && XEXP (x, 1) == const0_rtx)
10491 *cost = 0;
10492 return true;
10494 return false;
10496 case ABS:
10497 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10498 && (mode == SFmode || !TARGET_VFP_SINGLE))
10500 *cost = COSTS_N_INSNS (1);
10501 if (speed_p)
10502 *cost += extra_cost->fp[mode != SFmode].neg;
10504 return false;
10506 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10508 *cost = LIBCALL_COST (1);
10509 return false;
10512 if (mode == SImode)
10514 *cost = COSTS_N_INSNS (1);
10515 if (speed_p)
10516 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10517 return false;
10519 /* Vector mode? */
10520 *cost = LIBCALL_COST (1);
10521 return false;
10523 case SIGN_EXTEND:
10524 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10525 && MEM_P (XEXP (x, 0)))
10527 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10529 if (mode == DImode)
10530 *cost += COSTS_N_INSNS (1);
10532 if (!speed_p)
10533 return true;
10535 if (GET_MODE (XEXP (x, 0)) == SImode)
10536 *cost += extra_cost->ldst.load;
10537 else
10538 *cost += extra_cost->ldst.load_sign_extend;
10540 if (mode == DImode)
10541 *cost += extra_cost->alu.shift;
10543 return true;
10546 /* Widening from less than 32-bits requires an extend operation. */
10547 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10549 /* We have SXTB/SXTH. */
10550 *cost = COSTS_N_INSNS (1);
10551 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10552 if (speed_p)
10553 *cost += extra_cost->alu.extend;
10555 else if (GET_MODE (XEXP (x, 0)) != SImode)
10557 /* Needs two shifts. */
10558 *cost = COSTS_N_INSNS (2);
10559 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10560 if (speed_p)
10561 *cost += 2 * extra_cost->alu.shift;
10564 /* Widening beyond 32-bits requires one more insn. */
10565 if (mode == DImode)
10567 *cost += COSTS_N_INSNS (1);
10568 if (speed_p)
10569 *cost += extra_cost->alu.shift;
10572 return true;
10574 case ZERO_EXTEND:
10575 if ((arm_arch4
10576 || GET_MODE (XEXP (x, 0)) == SImode
10577 || GET_MODE (XEXP (x, 0)) == QImode)
10578 && MEM_P (XEXP (x, 0)))
10580 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10582 if (mode == DImode)
10583 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10585 return true;
10588 /* Widening from less than 32-bits requires an extend operation. */
10589 if (GET_MODE (XEXP (x, 0)) == QImode)
10591 /* UXTB can be a shorter instruction in Thumb2, but it might
10592 be slower than the AND Rd, Rn, #255 alternative. When
10593 optimizing for speed it should never be slower to use
10594 AND, and we don't really model 16-bit vs 32-bit insns
10595 here. */
10596 *cost = COSTS_N_INSNS (1);
10597 if (speed_p)
10598 *cost += extra_cost->alu.logical;
10600 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10602 /* We have UXTB/UXTH. */
10603 *cost = COSTS_N_INSNS (1);
10604 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10605 if (speed_p)
10606 *cost += extra_cost->alu.extend;
10608 else if (GET_MODE (XEXP (x, 0)) != SImode)
10610 /* Needs two shifts. It's marginally preferable to use
10611 shifts rather than two BIC instructions as the second
10612 shift may merge with a subsequent insn as a shifter
10613 op. */
10614 *cost = COSTS_N_INSNS (2);
10615 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10616 if (speed_p)
10617 *cost += 2 * extra_cost->alu.shift;
10619 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10620 *cost = COSTS_N_INSNS (1);
10622 /* Widening beyond 32-bits requires one more insn. */
10623 if (mode == DImode)
10625 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10628 return true;
10630 case CONST_INT:
10631 *cost = 0;
10632 /* CONST_INT has no mode, so we cannot tell for sure how many
10633 insns are really going to be needed. The best we can do is
10634 look at the value passed. If it fits in SImode, then assume
10635 that's the mode it will be used for. Otherwise assume it
10636 will be used in DImode. */
10637 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10638 mode = SImode;
10639 else
10640 mode = DImode;
10642 /* Avoid blowing up in arm_gen_constant (). */
10643 if (!(outer_code == PLUS
10644 || outer_code == AND
10645 || outer_code == IOR
10646 || outer_code == XOR
10647 || outer_code == MINUS))
10648 outer_code = SET;
10650 const_int_cost:
10651 if (mode == SImode)
10653 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10654 INTVAL (x), NULL, NULL,
10655 0, 0));
10656 /* Extra costs? */
10658 else
10660 *cost += COSTS_N_INSNS (arm_gen_constant
10661 (outer_code, SImode, NULL,
10662 trunc_int_for_mode (INTVAL (x), SImode),
10663 NULL, NULL, 0, 0)
10664 + arm_gen_constant (outer_code, SImode, NULL,
10665 INTVAL (x) >> 32, NULL,
10666 NULL, 0, 0));
10667 /* Extra costs? */
10670 return true;
10672 case CONST:
10673 case LABEL_REF:
10674 case SYMBOL_REF:
10675 if (speed_p)
10677 if (arm_arch_thumb2 && !flag_pic)
10678 *cost = COSTS_N_INSNS (2);
10679 else
10680 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10682 else
10683 *cost = COSTS_N_INSNS (2);
10685 if (flag_pic)
10687 *cost += COSTS_N_INSNS (1);
10688 if (speed_p)
10689 *cost += extra_cost->alu.arith;
10692 return true;
10694 case CONST_FIXED:
10695 *cost = COSTS_N_INSNS (4);
10696 /* Fixme. */
10697 return true;
10699 case CONST_DOUBLE:
10700 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10701 && (mode == SFmode || !TARGET_VFP_SINGLE))
10703 if (vfp3_const_double_rtx (x))
10705 *cost = COSTS_N_INSNS (1);
10706 if (speed_p)
10707 *cost += extra_cost->fp[mode == DFmode].fpconst;
10708 return true;
10711 if (speed_p)
10713 *cost = COSTS_N_INSNS (1);
10714 if (mode == DFmode)
10715 *cost += extra_cost->ldst.loadd;
10716 else
10717 *cost += extra_cost->ldst.loadf;
10719 else
10720 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10722 return true;
10724 *cost = COSTS_N_INSNS (4);
10725 return true;
10727 case CONST_VECTOR:
10728 /* Fixme. */
10729 if (TARGET_NEON
10730 && TARGET_HARD_FLOAT
10731 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10732 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10733 *cost = COSTS_N_INSNS (1);
10734 else
10735 *cost = COSTS_N_INSNS (4);
10736 return true;
10738 case HIGH:
10739 case LO_SUM:
10740 *cost = COSTS_N_INSNS (1);
10741 /* When optimizing for size, we prefer constant pool entries to
10742 MOVW/MOVT pairs, so bump the cost of these slightly. */
10743 if (!speed_p)
10744 *cost += 1;
10745 return true;
10747 case CLZ:
10748 *cost = COSTS_N_INSNS (1);
10749 if (speed_p)
10750 *cost += extra_cost->alu.clz;
10751 return false;
10753 case SMIN:
10754 if (XEXP (x, 1) == const0_rtx)
10756 *cost = COSTS_N_INSNS (1);
10757 if (speed_p)
10758 *cost += extra_cost->alu.log_shift;
10759 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10760 return true;
10762 /* Fall through. */
10763 case SMAX:
10764 case UMIN:
10765 case UMAX:
10766 *cost = COSTS_N_INSNS (2);
10767 return false;
10769 case TRUNCATE:
10770 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10771 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10772 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10773 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10774 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10775 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10776 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10777 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10778 == ZERO_EXTEND))))
10780 *cost = COSTS_N_INSNS (1);
10781 if (speed_p)
10782 *cost += extra_cost->mult[1].extend;
10783 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10784 speed_p)
10785 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10786 0, speed_p));
10787 return true;
10789 *cost = LIBCALL_COST (1);
10790 return false;
10792 case UNSPEC:
10793 return arm_unspec_cost (x, outer_code, speed_p, cost);
10795 case PC:
10796 /* Reading the PC is like reading any other register. Writing it
10797 is more expensive, but we take that into account elsewhere. */
10798 *cost = 0;
10799 return true;
10801 case ZERO_EXTRACT:
10802 /* TODO: Simple zero_extract of bottom bits using AND. */
10803 /* Fall through. */
10804 case SIGN_EXTRACT:
10805 if (arm_arch6
10806 && mode == SImode
10807 && CONST_INT_P (XEXP (x, 1))
10808 && CONST_INT_P (XEXP (x, 2)))
10810 *cost = COSTS_N_INSNS (1);
10811 if (speed_p)
10812 *cost += extra_cost->alu.bfx;
10813 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10814 return true;
10816 /* Without UBFX/SBFX, need to resort to shift operations. */
10817 *cost = COSTS_N_INSNS (2);
10818 if (speed_p)
10819 *cost += 2 * extra_cost->alu.shift;
10820 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10821 return true;
10823 case FLOAT_EXTEND:
10824 if (TARGET_HARD_FLOAT)
10826 *cost = COSTS_N_INSNS (1);
10827 if (speed_p)
10828 *cost += extra_cost->fp[mode == DFmode].widen;
10829 if (!TARGET_FPU_ARMV8
10830 && GET_MODE (XEXP (x, 0)) == HFmode)
10832 /* Pre v8, widening HF->DF is a two-step process, first
10833 widening to SFmode. */
10834 *cost += COSTS_N_INSNS (1);
10835 if (speed_p)
10836 *cost += extra_cost->fp[0].widen;
10838 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10839 return true;
10842 *cost = LIBCALL_COST (1);
10843 return false;
10845 case FLOAT_TRUNCATE:
10846 if (TARGET_HARD_FLOAT)
10848 *cost = COSTS_N_INSNS (1);
10849 if (speed_p)
10850 *cost += extra_cost->fp[mode == DFmode].narrow;
10851 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10852 return true;
10853 /* Vector modes? */
10855 *cost = LIBCALL_COST (1);
10856 return false;
10858 case FMA:
10859 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10861 rtx op0 = XEXP (x, 0);
10862 rtx op1 = XEXP (x, 1);
10863 rtx op2 = XEXP (x, 2);
10865 *cost = COSTS_N_INSNS (1);
10867 /* vfms or vfnma. */
10868 if (GET_CODE (op0) == NEG)
10869 op0 = XEXP (op0, 0);
10871 /* vfnms or vfnma. */
10872 if (GET_CODE (op2) == NEG)
10873 op2 = XEXP (op2, 0);
10875 *cost += rtx_cost (op0, FMA, 0, speed_p);
10876 *cost += rtx_cost (op1, FMA, 1, speed_p);
10877 *cost += rtx_cost (op2, FMA, 2, speed_p);
10879 if (speed_p)
10880 *cost += extra_cost->fp[mode ==DFmode].fma;
10882 return true;
10885 *cost = LIBCALL_COST (3);
10886 return false;
10888 case FIX:
10889 case UNSIGNED_FIX:
10890 if (TARGET_HARD_FLOAT)
10892 if (GET_MODE_CLASS (mode) == MODE_INT)
10894 *cost = COSTS_N_INSNS (1);
10895 if (speed_p)
10896 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
10897 /* Strip of the 'cost' of rounding towards zero. */
10898 if (GET_CODE (XEXP (x, 0)) == FIX)
10899 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
10900 else
10901 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10902 /* ??? Increase the cost to deal with transferring from
10903 FP -> CORE registers? */
10904 return true;
10906 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10907 && TARGET_FPU_ARMV8)
10909 *cost = COSTS_N_INSNS (1);
10910 if (speed_p)
10911 *cost += extra_cost->fp[mode == DFmode].roundint;
10912 return false;
10914 /* Vector costs? */
10916 *cost = LIBCALL_COST (1);
10917 return false;
10919 case FLOAT:
10920 case UNSIGNED_FLOAT:
10921 if (TARGET_HARD_FLOAT)
10923 /* ??? Increase the cost to deal with transferring from CORE
10924 -> FP registers? */
10925 *cost = COSTS_N_INSNS (1);
10926 if (speed_p)
10927 *cost += extra_cost->fp[mode == DFmode].fromint;
10928 return false;
10930 *cost = LIBCALL_COST (1);
10931 return false;
10933 case CALL:
10934 *cost = COSTS_N_INSNS (1);
10935 return true;
10937 case ASM_OPERANDS:
10939 /* Just a guess. Guess number of instructions in the asm
10940 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10941 though (see PR60663). */
10942 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10943 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10945 *cost = COSTS_N_INSNS (asm_length + num_operands);
10946 return true;
10948 default:
10949 if (mode != VOIDmode)
10950 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10951 else
10952 *cost = COSTS_N_INSNS (4); /* Who knows? */
10953 return false;
10957 #undef HANDLE_NARROW_SHIFT_ARITH
10959 /* RTX costs when optimizing for size. */
10960 static bool
10961 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10962 int *total, bool speed)
10964 bool result;
10966 if (TARGET_OLD_RTX_COSTS
10967 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
10969 /* Old way. (Deprecated.) */
10970 if (!speed)
10971 result = arm_size_rtx_costs (x, (enum rtx_code) code,
10972 (enum rtx_code) outer_code, total);
10973 else
10974 result = current_tune->rtx_costs (x, (enum rtx_code) code,
10975 (enum rtx_code) outer_code, total,
10976 speed);
10978 else
10980 /* New way. */
10981 if (current_tune->insn_extra_cost)
10982 result = arm_new_rtx_costs (x, (enum rtx_code) code,
10983 (enum rtx_code) outer_code,
10984 current_tune->insn_extra_cost,
10985 total, speed);
10986 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
10987 && current_tune->insn_extra_cost != NULL */
10988 else
10989 result = arm_new_rtx_costs (x, (enum rtx_code) code,
10990 (enum rtx_code) outer_code,
10991 &generic_extra_costs, total, speed);
10994 if (dump_file && (dump_flags & TDF_DETAILS))
10996 print_rtl_single (dump_file, x);
10997 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10998 *total, result ? "final" : "partial");
11000 return result;
11003 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11004 supported on any "slowmul" cores, so it can be ignored. */
11006 static bool
11007 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11008 int *total, bool speed)
11010 machine_mode mode = GET_MODE (x);
11012 if (TARGET_THUMB)
11014 *total = thumb1_rtx_costs (x, code, outer_code);
11015 return true;
11018 switch (code)
11020 case MULT:
11021 if (GET_MODE_CLASS (mode) == MODE_FLOAT
11022 || mode == DImode)
11024 *total = COSTS_N_INSNS (20);
11025 return false;
11028 if (CONST_INT_P (XEXP (x, 1)))
11030 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11031 & (unsigned HOST_WIDE_INT) 0xffffffff);
11032 int cost, const_ok = const_ok_for_arm (i);
11033 int j, booth_unit_size;
11035 /* Tune as appropriate. */
11036 cost = const_ok ? 4 : 8;
11037 booth_unit_size = 2;
11038 for (j = 0; i && j < 32; j += booth_unit_size)
11040 i >>= booth_unit_size;
11041 cost++;
11044 *total = COSTS_N_INSNS (cost);
11045 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
11046 return true;
11049 *total = COSTS_N_INSNS (20);
11050 return false;
11052 default:
11053 return arm_rtx_costs_1 (x, outer_code, total, speed);;
11058 /* RTX cost for cores with a fast multiply unit (M variants). */
11060 static bool
11061 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11062 int *total, bool speed)
11064 machine_mode mode = GET_MODE (x);
11066 if (TARGET_THUMB1)
11068 *total = thumb1_rtx_costs (x, code, outer_code);
11069 return true;
11072 /* ??? should thumb2 use different costs? */
11073 switch (code)
11075 case MULT:
11076 /* There is no point basing this on the tuning, since it is always the
11077 fast variant if it exists at all. */
11078 if (mode == DImode
11079 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11080 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11081 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11083 *total = COSTS_N_INSNS(2);
11084 return false;
11088 if (mode == DImode)
11090 *total = COSTS_N_INSNS (5);
11091 return false;
11094 if (CONST_INT_P (XEXP (x, 1)))
11096 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11097 & (unsigned HOST_WIDE_INT) 0xffffffff);
11098 int cost, const_ok = const_ok_for_arm (i);
11099 int j, booth_unit_size;
11101 /* Tune as appropriate. */
11102 cost = const_ok ? 4 : 8;
11103 booth_unit_size = 8;
11104 for (j = 0; i && j < 32; j += booth_unit_size)
11106 i >>= booth_unit_size;
11107 cost++;
11110 *total = COSTS_N_INSNS(cost);
11111 return false;
11114 if (mode == SImode)
11116 *total = COSTS_N_INSNS (4);
11117 return false;
11120 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11122 if (TARGET_HARD_FLOAT
11123 && (mode == SFmode
11124 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11126 *total = COSTS_N_INSNS (1);
11127 return false;
11131 /* Requires a lib call */
11132 *total = COSTS_N_INSNS (20);
11133 return false;
11135 default:
11136 return arm_rtx_costs_1 (x, outer_code, total, speed);
11141 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11142 so it can be ignored. */
11144 static bool
11145 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11146 int *total, bool speed)
11148 machine_mode mode = GET_MODE (x);
11150 if (TARGET_THUMB)
11152 *total = thumb1_rtx_costs (x, code, outer_code);
11153 return true;
11156 switch (code)
11158 case COMPARE:
11159 if (GET_CODE (XEXP (x, 0)) != MULT)
11160 return arm_rtx_costs_1 (x, outer_code, total, speed);
11162 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11163 will stall until the multiplication is complete. */
11164 *total = COSTS_N_INSNS (3);
11165 return false;
11167 case MULT:
11168 /* There is no point basing this on the tuning, since it is always the
11169 fast variant if it exists at all. */
11170 if (mode == DImode
11171 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11172 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11173 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11175 *total = COSTS_N_INSNS (2);
11176 return false;
11180 if (mode == DImode)
11182 *total = COSTS_N_INSNS (5);
11183 return false;
11186 if (CONST_INT_P (XEXP (x, 1)))
11188 /* If operand 1 is a constant we can more accurately
11189 calculate the cost of the multiply. The multiplier can
11190 retire 15 bits on the first cycle and a further 12 on the
11191 second. We do, of course, have to load the constant into
11192 a register first. */
11193 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11194 /* There's a general overhead of one cycle. */
11195 int cost = 1;
11196 unsigned HOST_WIDE_INT masked_const;
11198 if (i & 0x80000000)
11199 i = ~i;
11201 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11203 masked_const = i & 0xffff8000;
11204 if (masked_const != 0)
11206 cost++;
11207 masked_const = i & 0xf8000000;
11208 if (masked_const != 0)
11209 cost++;
11211 *total = COSTS_N_INSNS (cost);
11212 return false;
11215 if (mode == SImode)
11217 *total = COSTS_N_INSNS (3);
11218 return false;
11221 /* Requires a lib call */
11222 *total = COSTS_N_INSNS (20);
11223 return false;
11225 default:
11226 return arm_rtx_costs_1 (x, outer_code, total, speed);
11231 /* RTX costs for 9e (and later) cores. */
11233 static bool
11234 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11235 int *total, bool speed)
11237 machine_mode mode = GET_MODE (x);
11239 if (TARGET_THUMB1)
11241 switch (code)
11243 case MULT:
11244 /* Small multiply: 32 cycles for an integer multiply inst. */
11245 if (arm_arch6m && arm_m_profile_small_mul)
11246 *total = COSTS_N_INSNS (32);
11247 else
11248 *total = COSTS_N_INSNS (3);
11249 return true;
11251 default:
11252 *total = thumb1_rtx_costs (x, code, outer_code);
11253 return true;
11257 switch (code)
11259 case MULT:
11260 /* There is no point basing this on the tuning, since it is always the
11261 fast variant if it exists at all. */
11262 if (mode == DImode
11263 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11264 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11265 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11267 *total = COSTS_N_INSNS (2);
11268 return false;
11272 if (mode == DImode)
11274 *total = COSTS_N_INSNS (5);
11275 return false;
11278 if (mode == SImode)
11280 *total = COSTS_N_INSNS (2);
11281 return false;
11284 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11286 if (TARGET_HARD_FLOAT
11287 && (mode == SFmode
11288 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11290 *total = COSTS_N_INSNS (1);
11291 return false;
11295 *total = COSTS_N_INSNS (20);
11296 return false;
11298 default:
11299 return arm_rtx_costs_1 (x, outer_code, total, speed);
11302 /* All address computations that can be done are free, but rtx cost returns
11303 the same for practically all of them. So we weight the different types
11304 of address here in the order (most pref first):
11305 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11306 static inline int
11307 arm_arm_address_cost (rtx x)
11309 enum rtx_code c = GET_CODE (x);
11311 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11312 return 0;
11313 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11314 return 10;
11316 if (c == PLUS)
11318 if (CONST_INT_P (XEXP (x, 1)))
11319 return 2;
11321 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11322 return 3;
11324 return 4;
11327 return 6;
11330 static inline int
11331 arm_thumb_address_cost (rtx x)
11333 enum rtx_code c = GET_CODE (x);
11335 if (c == REG)
11336 return 1;
11337 if (c == PLUS
11338 && REG_P (XEXP (x, 0))
11339 && CONST_INT_P (XEXP (x, 1)))
11340 return 1;
11342 return 2;
11345 static int
11346 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11347 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11349 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11352 /* Adjust cost hook for XScale. */
11353 static bool
11354 xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11356 /* Some true dependencies can have a higher cost depending
11357 on precisely how certain input operands are used. */
11358 if (REG_NOTE_KIND(link) == 0
11359 && recog_memoized (insn) >= 0
11360 && recog_memoized (dep) >= 0)
11362 int shift_opnum = get_attr_shift (insn);
11363 enum attr_type attr_type = get_attr_type (dep);
11365 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11366 operand for INSN. If we have a shifted input operand and the
11367 instruction we depend on is another ALU instruction, then we may
11368 have to account for an additional stall. */
11369 if (shift_opnum != 0
11370 && (attr_type == TYPE_ALU_SHIFT_IMM
11371 || attr_type == TYPE_ALUS_SHIFT_IMM
11372 || attr_type == TYPE_LOGIC_SHIFT_IMM
11373 || attr_type == TYPE_LOGICS_SHIFT_IMM
11374 || attr_type == TYPE_ALU_SHIFT_REG
11375 || attr_type == TYPE_ALUS_SHIFT_REG
11376 || attr_type == TYPE_LOGIC_SHIFT_REG
11377 || attr_type == TYPE_LOGICS_SHIFT_REG
11378 || attr_type == TYPE_MOV_SHIFT
11379 || attr_type == TYPE_MVN_SHIFT
11380 || attr_type == TYPE_MOV_SHIFT_REG
11381 || attr_type == TYPE_MVN_SHIFT_REG))
11383 rtx shifted_operand;
11384 int opno;
11386 /* Get the shifted operand. */
11387 extract_insn (insn);
11388 shifted_operand = recog_data.operand[shift_opnum];
11390 /* Iterate over all the operands in DEP. If we write an operand
11391 that overlaps with SHIFTED_OPERAND, then we have increase the
11392 cost of this dependency. */
11393 extract_insn (dep);
11394 preprocess_constraints (dep);
11395 for (opno = 0; opno < recog_data.n_operands; opno++)
11397 /* We can ignore strict inputs. */
11398 if (recog_data.operand_type[opno] == OP_IN)
11399 continue;
11401 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11402 shifted_operand))
11404 *cost = 2;
11405 return false;
11410 return true;
11413 /* Adjust cost hook for Cortex A9. */
11414 static bool
11415 cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11417 switch (REG_NOTE_KIND (link))
11419 case REG_DEP_ANTI:
11420 *cost = 0;
11421 return false;
11423 case REG_DEP_TRUE:
11424 case REG_DEP_OUTPUT:
11425 if (recog_memoized (insn) >= 0
11426 && recog_memoized (dep) >= 0)
11428 if (GET_CODE (PATTERN (insn)) == SET)
11430 if (GET_MODE_CLASS
11431 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11432 || GET_MODE_CLASS
11433 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11435 enum attr_type attr_type_insn = get_attr_type (insn);
11436 enum attr_type attr_type_dep = get_attr_type (dep);
11438 /* By default all dependencies of the form
11439 s0 = s0 <op> s1
11440 s0 = s0 <op> s2
11441 have an extra latency of 1 cycle because
11442 of the input and output dependency in this
11443 case. However this gets modeled as an true
11444 dependency and hence all these checks. */
11445 if (REG_P (SET_DEST (PATTERN (insn)))
11446 && REG_P (SET_DEST (PATTERN (dep)))
11447 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11448 SET_DEST (PATTERN (dep))))
11450 /* FMACS is a special case where the dependent
11451 instruction can be issued 3 cycles before
11452 the normal latency in case of an output
11453 dependency. */
11454 if ((attr_type_insn == TYPE_FMACS
11455 || attr_type_insn == TYPE_FMACD)
11456 && (attr_type_dep == TYPE_FMACS
11457 || attr_type_dep == TYPE_FMACD))
11459 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11460 *cost = insn_default_latency (dep) - 3;
11461 else
11462 *cost = insn_default_latency (dep);
11463 return false;
11465 else
11467 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11468 *cost = insn_default_latency (dep) + 1;
11469 else
11470 *cost = insn_default_latency (dep);
11472 return false;
11477 break;
11479 default:
11480 gcc_unreachable ();
11483 return true;
11486 /* Adjust cost hook for FA726TE. */
11487 static bool
11488 fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11490 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11491 have penalty of 3. */
11492 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11493 && recog_memoized (insn) >= 0
11494 && recog_memoized (dep) >= 0
11495 && get_attr_conds (dep) == CONDS_SET)
11497 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11498 if (get_attr_conds (insn) == CONDS_USE
11499 && get_attr_type (insn) != TYPE_BRANCH)
11501 *cost = 3;
11502 return false;
11505 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11506 || get_attr_conds (insn) == CONDS_USE)
11508 *cost = 0;
11509 return false;
11513 return true;
11516 /* Implement TARGET_REGISTER_MOVE_COST.
11518 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11519 it is typically more expensive than a single memory access. We set
11520 the cost to less than two memory accesses so that floating
11521 point to integer conversion does not go through memory. */
11524 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11525 reg_class_t from, reg_class_t to)
11527 if (TARGET_32BIT)
11529 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11530 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11531 return 15;
11532 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11533 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11534 return 4;
11535 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11536 return 20;
11537 else
11538 return 2;
11540 else
11542 if (from == HI_REGS || to == HI_REGS)
11543 return 4;
11544 else
11545 return 2;
11549 /* Implement TARGET_MEMORY_MOVE_COST. */
11552 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11553 bool in ATTRIBUTE_UNUSED)
11555 if (TARGET_32BIT)
11556 return 10;
11557 else
11559 if (GET_MODE_SIZE (mode) < 4)
11560 return 8;
11561 else
11562 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11566 /* Vectorizer cost model implementation. */
11568 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11569 static int
11570 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11571 tree vectype,
11572 int misalign ATTRIBUTE_UNUSED)
11574 unsigned elements;
11576 switch (type_of_cost)
11578 case scalar_stmt:
11579 return current_tune->vec_costs->scalar_stmt_cost;
11581 case scalar_load:
11582 return current_tune->vec_costs->scalar_load_cost;
11584 case scalar_store:
11585 return current_tune->vec_costs->scalar_store_cost;
11587 case vector_stmt:
11588 return current_tune->vec_costs->vec_stmt_cost;
11590 case vector_load:
11591 return current_tune->vec_costs->vec_align_load_cost;
11593 case vector_store:
11594 return current_tune->vec_costs->vec_store_cost;
11596 case vec_to_scalar:
11597 return current_tune->vec_costs->vec_to_scalar_cost;
11599 case scalar_to_vec:
11600 return current_tune->vec_costs->scalar_to_vec_cost;
11602 case unaligned_load:
11603 return current_tune->vec_costs->vec_unalign_load_cost;
11605 case unaligned_store:
11606 return current_tune->vec_costs->vec_unalign_store_cost;
11608 case cond_branch_taken:
11609 return current_tune->vec_costs->cond_taken_branch_cost;
11611 case cond_branch_not_taken:
11612 return current_tune->vec_costs->cond_not_taken_branch_cost;
11614 case vec_perm:
11615 case vec_promote_demote:
11616 return current_tune->vec_costs->vec_stmt_cost;
11618 case vec_construct:
11619 elements = TYPE_VECTOR_SUBPARTS (vectype);
11620 return elements / 2 + 1;
11622 default:
11623 gcc_unreachable ();
11627 /* Implement targetm.vectorize.add_stmt_cost. */
11629 static unsigned
11630 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11631 struct _stmt_vec_info *stmt_info, int misalign,
11632 enum vect_cost_model_location where)
11634 unsigned *cost = (unsigned *) data;
11635 unsigned retval = 0;
11637 if (flag_vect_cost_model)
11639 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11640 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11642 /* Statements in an inner loop relative to the loop being
11643 vectorized are weighted more heavily. The value here is
11644 arbitrary and could potentially be improved with analysis. */
11645 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11646 count *= 50; /* FIXME. */
11648 retval = (unsigned) (count * stmt_cost);
11649 cost[where] += retval;
11652 return retval;
11655 /* Return true if and only if this insn can dual-issue only as older. */
11656 static bool
11657 cortexa7_older_only (rtx_insn *insn)
11659 if (recog_memoized (insn) < 0)
11660 return false;
11662 switch (get_attr_type (insn))
11664 case TYPE_ALU_DSP_REG:
11665 case TYPE_ALU_SREG:
11666 case TYPE_ALUS_SREG:
11667 case TYPE_LOGIC_REG:
11668 case TYPE_LOGICS_REG:
11669 case TYPE_ADC_REG:
11670 case TYPE_ADCS_REG:
11671 case TYPE_ADR:
11672 case TYPE_BFM:
11673 case TYPE_REV:
11674 case TYPE_MVN_REG:
11675 case TYPE_SHIFT_IMM:
11676 case TYPE_SHIFT_REG:
11677 case TYPE_LOAD_BYTE:
11678 case TYPE_LOAD1:
11679 case TYPE_STORE1:
11680 case TYPE_FFARITHS:
11681 case TYPE_FADDS:
11682 case TYPE_FFARITHD:
11683 case TYPE_FADDD:
11684 case TYPE_FMOV:
11685 case TYPE_F_CVT:
11686 case TYPE_FCMPS:
11687 case TYPE_FCMPD:
11688 case TYPE_FCONSTS:
11689 case TYPE_FCONSTD:
11690 case TYPE_FMULS:
11691 case TYPE_FMACS:
11692 case TYPE_FMULD:
11693 case TYPE_FMACD:
11694 case TYPE_FDIVS:
11695 case TYPE_FDIVD:
11696 case TYPE_F_MRC:
11697 case TYPE_F_MRRC:
11698 case TYPE_F_FLAG:
11699 case TYPE_F_LOADS:
11700 case TYPE_F_STORES:
11701 return true;
11702 default:
11703 return false;
11707 /* Return true if and only if this insn can dual-issue as younger. */
11708 static bool
11709 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11711 if (recog_memoized (insn) < 0)
11713 if (verbose > 5)
11714 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11715 return false;
11718 switch (get_attr_type (insn))
11720 case TYPE_ALU_IMM:
11721 case TYPE_ALUS_IMM:
11722 case TYPE_LOGIC_IMM:
11723 case TYPE_LOGICS_IMM:
11724 case TYPE_EXTEND:
11725 case TYPE_MVN_IMM:
11726 case TYPE_MOV_IMM:
11727 case TYPE_MOV_REG:
11728 case TYPE_MOV_SHIFT:
11729 case TYPE_MOV_SHIFT_REG:
11730 case TYPE_BRANCH:
11731 case TYPE_CALL:
11732 return true;
11733 default:
11734 return false;
11739 /* Look for an instruction that can dual issue only as an older
11740 instruction, and move it in front of any instructions that can
11741 dual-issue as younger, while preserving the relative order of all
11742 other instructions in the ready list. This is a hueuristic to help
11743 dual-issue in later cycles, by postponing issue of more flexible
11744 instructions. This heuristic may affect dual issue opportunities
11745 in the current cycle. */
11746 static void
11747 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11748 int *n_readyp, int clock)
11750 int i;
11751 int first_older_only = -1, first_younger = -1;
11753 if (verbose > 5)
11754 fprintf (file,
11755 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11756 clock,
11757 *n_readyp);
11759 /* Traverse the ready list from the head (the instruction to issue
11760 first), and looking for the first instruction that can issue as
11761 younger and the first instruction that can dual-issue only as
11762 older. */
11763 for (i = *n_readyp - 1; i >= 0; i--)
11765 rtx_insn *insn = ready[i];
11766 if (cortexa7_older_only (insn))
11768 first_older_only = i;
11769 if (verbose > 5)
11770 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11771 break;
11773 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11774 first_younger = i;
11777 /* Nothing to reorder because either no younger insn found or insn
11778 that can dual-issue only as older appears before any insn that
11779 can dual-issue as younger. */
11780 if (first_younger == -1)
11782 if (verbose > 5)
11783 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11784 return;
11787 /* Nothing to reorder because no older-only insn in the ready list. */
11788 if (first_older_only == -1)
11790 if (verbose > 5)
11791 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11792 return;
11795 /* Move first_older_only insn before first_younger. */
11796 if (verbose > 5)
11797 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11798 INSN_UID(ready [first_older_only]),
11799 INSN_UID(ready [first_younger]));
11800 rtx_insn *first_older_only_insn = ready [first_older_only];
11801 for (i = first_older_only; i < first_younger; i++)
11803 ready[i] = ready[i+1];
11806 ready[i] = first_older_only_insn;
11807 return;
11810 /* Implement TARGET_SCHED_REORDER. */
11811 static int
11812 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11813 int clock)
11815 switch (arm_tune)
11817 case cortexa7:
11818 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11819 break;
11820 default:
11821 /* Do nothing for other cores. */
11822 break;
11825 return arm_issue_rate ();
11828 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11829 It corrects the value of COST based on the relationship between
11830 INSN and DEP through the dependence LINK. It returns the new
11831 value. There is a per-core adjust_cost hook to adjust scheduler costs
11832 and the per-core hook can choose to completely override the generic
11833 adjust_cost function. Only put bits of code into arm_adjust_cost that
11834 are common across all cores. */
11835 static int
11836 arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
11838 rtx i_pat, d_pat;
11840 /* When generating Thumb-1 code, we want to place flag-setting operations
11841 close to a conditional branch which depends on them, so that we can
11842 omit the comparison. */
11843 if (TARGET_THUMB1
11844 && REG_NOTE_KIND (link) == 0
11845 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11846 && recog_memoized (dep) >= 0
11847 && get_attr_conds (dep) == CONDS_SET)
11848 return 0;
11850 if (current_tune->sched_adjust_cost != NULL)
11852 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11853 return cost;
11856 /* XXX Is this strictly true? */
11857 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11858 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11859 return 0;
11861 /* Call insns don't incur a stall, even if they follow a load. */
11862 if (REG_NOTE_KIND (link) == 0
11863 && CALL_P (insn))
11864 return 1;
11866 if ((i_pat = single_set (insn)) != NULL
11867 && MEM_P (SET_SRC (i_pat))
11868 && (d_pat = single_set (dep)) != NULL
11869 && MEM_P (SET_DEST (d_pat)))
11871 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11872 /* This is a load after a store, there is no conflict if the load reads
11873 from a cached area. Assume that loads from the stack, and from the
11874 constant pool are cached, and that others will miss. This is a
11875 hack. */
11877 if ((GET_CODE (src_mem) == SYMBOL_REF
11878 && CONSTANT_POOL_ADDRESS_P (src_mem))
11879 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11880 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11881 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11882 return 1;
11885 return cost;
11889 arm_max_conditional_execute (void)
11891 return max_insns_skipped;
11894 static int
11895 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11897 if (TARGET_32BIT)
11898 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11899 else
11900 return (optimize > 0) ? 2 : 0;
11903 static int
11904 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11906 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11909 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11910 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11911 sequences of non-executed instructions in IT blocks probably take the same
11912 amount of time as executed instructions (and the IT instruction itself takes
11913 space in icache). This function was experimentally determined to give good
11914 results on a popular embedded benchmark. */
11916 static int
11917 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11919 return (TARGET_32BIT && speed_p) ? 1
11920 : arm_default_branch_cost (speed_p, predictable_p);
11923 static int
11924 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11926 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11929 static bool fp_consts_inited = false;
11931 static REAL_VALUE_TYPE value_fp0;
11933 static void
11934 init_fp_table (void)
11936 REAL_VALUE_TYPE r;
11938 r = REAL_VALUE_ATOF ("0", DFmode);
11939 value_fp0 = r;
11940 fp_consts_inited = true;
11943 /* Return TRUE if rtx X is a valid immediate FP constant. */
11945 arm_const_double_rtx (rtx x)
11947 REAL_VALUE_TYPE r;
11949 if (!fp_consts_inited)
11950 init_fp_table ();
11952 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11953 if (REAL_VALUE_MINUS_ZERO (r))
11954 return 0;
11956 if (REAL_VALUES_EQUAL (r, value_fp0))
11957 return 1;
11959 return 0;
11962 /* VFPv3 has a fairly wide range of representable immediates, formed from
11963 "quarter-precision" floating-point values. These can be evaluated using this
11964 formula (with ^ for exponentiation):
11966 -1^s * n * 2^-r
11968 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11969 16 <= n <= 31 and 0 <= r <= 7.
11971 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11973 - A (most-significant) is the sign bit.
11974 - BCD are the exponent (encoded as r XOR 3).
11975 - EFGH are the mantissa (encoded as n - 16).
11978 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11979 fconst[sd] instruction, or -1 if X isn't suitable. */
11980 static int
11981 vfp3_const_double_index (rtx x)
11983 REAL_VALUE_TYPE r, m;
11984 int sign, exponent;
11985 unsigned HOST_WIDE_INT mantissa, mant_hi;
11986 unsigned HOST_WIDE_INT mask;
11987 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11988 bool fail;
11990 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11991 return -1;
11993 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11995 /* We can't represent these things, so detect them first. */
11996 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11997 return -1;
11999 /* Extract sign, exponent and mantissa. */
12000 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12001 r = real_value_abs (&r);
12002 exponent = REAL_EXP (&r);
12003 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12004 highest (sign) bit, with a fixed binary point at bit point_pos.
12005 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12006 bits for the mantissa, this may fail (low bits would be lost). */
12007 real_ldexp (&m, &r, point_pos - exponent);
12008 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12009 mantissa = w.elt (0);
12010 mant_hi = w.elt (1);
12012 /* If there are bits set in the low part of the mantissa, we can't
12013 represent this value. */
12014 if (mantissa != 0)
12015 return -1;
12017 /* Now make it so that mantissa contains the most-significant bits, and move
12018 the point_pos to indicate that the least-significant bits have been
12019 discarded. */
12020 point_pos -= HOST_BITS_PER_WIDE_INT;
12021 mantissa = mant_hi;
12023 /* We can permit four significant bits of mantissa only, plus a high bit
12024 which is always 1. */
12025 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12026 if ((mantissa & mask) != 0)
12027 return -1;
12029 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12030 mantissa >>= point_pos - 5;
12032 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12033 floating-point immediate zero with Neon using an integer-zero load, but
12034 that case is handled elsewhere.) */
12035 if (mantissa == 0)
12036 return -1;
12038 gcc_assert (mantissa >= 16 && mantissa <= 31);
12040 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12041 normalized significands are in the range [1, 2). (Our mantissa is shifted
12042 left 4 places at this point relative to normalized IEEE754 values). GCC
12043 internally uses [0.5, 1) (see real.c), so the exponent returned from
12044 REAL_EXP must be altered. */
12045 exponent = 5 - exponent;
12047 if (exponent < 0 || exponent > 7)
12048 return -1;
12050 /* Sign, mantissa and exponent are now in the correct form to plug into the
12051 formula described in the comment above. */
12052 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12055 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12057 vfp3_const_double_rtx (rtx x)
12059 if (!TARGET_VFP3)
12060 return 0;
12062 return vfp3_const_double_index (x) != -1;
12065 /* Recognize immediates which can be used in various Neon instructions. Legal
12066 immediates are described by the following table (for VMVN variants, the
12067 bitwise inverse of the constant shown is recognized. In either case, VMOV
12068 is output and the correct instruction to use for a given constant is chosen
12069 by the assembler). The constant shown is replicated across all elements of
12070 the destination vector.
12072 insn elems variant constant (binary)
12073 ---- ----- ------- -----------------
12074 vmov i32 0 00000000 00000000 00000000 abcdefgh
12075 vmov i32 1 00000000 00000000 abcdefgh 00000000
12076 vmov i32 2 00000000 abcdefgh 00000000 00000000
12077 vmov i32 3 abcdefgh 00000000 00000000 00000000
12078 vmov i16 4 00000000 abcdefgh
12079 vmov i16 5 abcdefgh 00000000
12080 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12081 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12082 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12083 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12084 vmvn i16 10 00000000 abcdefgh
12085 vmvn i16 11 abcdefgh 00000000
12086 vmov i32 12 00000000 00000000 abcdefgh 11111111
12087 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12088 vmov i32 14 00000000 abcdefgh 11111111 11111111
12089 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12090 vmov i8 16 abcdefgh
12091 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12092 eeeeeeee ffffffff gggggggg hhhhhhhh
12093 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12094 vmov f32 19 00000000 00000000 00000000 00000000
12096 For case 18, B = !b. Representable values are exactly those accepted by
12097 vfp3_const_double_index, but are output as floating-point numbers rather
12098 than indices.
12100 For case 19, we will change it to vmov.i32 when assembling.
12102 Variants 0-5 (inclusive) may also be used as immediates for the second
12103 operand of VORR/VBIC instructions.
12105 The INVERSE argument causes the bitwise inverse of the given operand to be
12106 recognized instead (used for recognizing legal immediates for the VAND/VORN
12107 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12108 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12109 output, rather than the real insns vbic/vorr).
12111 INVERSE makes no difference to the recognition of float vectors.
12113 The return value is the variant of immediate as shown in the above table, or
12114 -1 if the given value doesn't match any of the listed patterns.
12116 static int
12117 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12118 rtx *modconst, int *elementwidth)
12120 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12121 matches = 1; \
12122 for (i = 0; i < idx; i += (STRIDE)) \
12123 if (!(TEST)) \
12124 matches = 0; \
12125 if (matches) \
12127 immtype = (CLASS); \
12128 elsize = (ELSIZE); \
12129 break; \
12132 unsigned int i, elsize = 0, idx = 0, n_elts;
12133 unsigned int innersize;
12134 unsigned char bytes[16];
12135 int immtype = -1, matches;
12136 unsigned int invmask = inverse ? 0xff : 0;
12137 bool vector = GET_CODE (op) == CONST_VECTOR;
12139 if (vector)
12141 n_elts = CONST_VECTOR_NUNITS (op);
12142 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12144 else
12146 n_elts = 1;
12147 if (mode == VOIDmode)
12148 mode = DImode;
12149 innersize = GET_MODE_SIZE (mode);
12152 /* Vectors of float constants. */
12153 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12155 rtx el0 = CONST_VECTOR_ELT (op, 0);
12156 REAL_VALUE_TYPE r0;
12158 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12159 return -1;
12161 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12163 for (i = 1; i < n_elts; i++)
12165 rtx elt = CONST_VECTOR_ELT (op, i);
12166 REAL_VALUE_TYPE re;
12168 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12170 if (!REAL_VALUES_EQUAL (r0, re))
12171 return -1;
12174 if (modconst)
12175 *modconst = CONST_VECTOR_ELT (op, 0);
12177 if (elementwidth)
12178 *elementwidth = 0;
12180 if (el0 == CONST0_RTX (GET_MODE (el0)))
12181 return 19;
12182 else
12183 return 18;
12186 /* Splat vector constant out into a byte vector. */
12187 for (i = 0; i < n_elts; i++)
12189 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12190 unsigned HOST_WIDE_INT elpart;
12191 unsigned int part, parts;
12193 if (CONST_INT_P (el))
12195 elpart = INTVAL (el);
12196 parts = 1;
12198 else if (CONST_DOUBLE_P (el))
12200 elpart = CONST_DOUBLE_LOW (el);
12201 parts = 2;
12203 else
12204 gcc_unreachable ();
12206 for (part = 0; part < parts; part++)
12208 unsigned int byte;
12209 for (byte = 0; byte < innersize; byte++)
12211 bytes[idx++] = (elpart & 0xff) ^ invmask;
12212 elpart >>= BITS_PER_UNIT;
12214 if (CONST_DOUBLE_P (el))
12215 elpart = CONST_DOUBLE_HIGH (el);
12219 /* Sanity check. */
12220 gcc_assert (idx == GET_MODE_SIZE (mode));
12224 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12225 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12227 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12228 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12230 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12231 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12233 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12234 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12236 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12238 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12240 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12241 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12243 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12244 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12246 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12247 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12249 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12250 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12252 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12254 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12256 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12257 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12259 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12260 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12262 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12263 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12265 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12266 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12268 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12270 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12271 && bytes[i] == bytes[(i + 8) % idx]);
12273 while (0);
12275 if (immtype == -1)
12276 return -1;
12278 if (elementwidth)
12279 *elementwidth = elsize;
12281 if (modconst)
12283 unsigned HOST_WIDE_INT imm = 0;
12285 /* Un-invert bytes of recognized vector, if necessary. */
12286 if (invmask != 0)
12287 for (i = 0; i < idx; i++)
12288 bytes[i] ^= invmask;
12290 if (immtype == 17)
12292 /* FIXME: Broken on 32-bit H_W_I hosts. */
12293 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12295 for (i = 0; i < 8; i++)
12296 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12297 << (i * BITS_PER_UNIT);
12299 *modconst = GEN_INT (imm);
12301 else
12303 unsigned HOST_WIDE_INT imm = 0;
12305 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12306 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12308 *modconst = GEN_INT (imm);
12312 return immtype;
12313 #undef CHECK
12316 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12317 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12318 float elements), and a modified constant (whatever should be output for a
12319 VMOV) in *MODCONST. */
12322 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12323 rtx *modconst, int *elementwidth)
12325 rtx tmpconst;
12326 int tmpwidth;
12327 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12329 if (retval == -1)
12330 return 0;
12332 if (modconst)
12333 *modconst = tmpconst;
12335 if (elementwidth)
12336 *elementwidth = tmpwidth;
12338 return 1;
12341 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12342 the immediate is valid, write a constant suitable for using as an operand
12343 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12344 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12347 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12348 rtx *modconst, int *elementwidth)
12350 rtx tmpconst;
12351 int tmpwidth;
12352 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12354 if (retval < 0 || retval > 5)
12355 return 0;
12357 if (modconst)
12358 *modconst = tmpconst;
12360 if (elementwidth)
12361 *elementwidth = tmpwidth;
12363 return 1;
12366 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12367 the immediate is valid, write a constant suitable for using as an operand
12368 to VSHR/VSHL to *MODCONST and the corresponding element width to
12369 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12370 because they have different limitations. */
12373 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12374 rtx *modconst, int *elementwidth,
12375 bool isleftshift)
12377 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12378 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12379 unsigned HOST_WIDE_INT last_elt = 0;
12380 unsigned HOST_WIDE_INT maxshift;
12382 /* Split vector constant out into a byte vector. */
12383 for (i = 0; i < n_elts; i++)
12385 rtx el = CONST_VECTOR_ELT (op, i);
12386 unsigned HOST_WIDE_INT elpart;
12388 if (CONST_INT_P (el))
12389 elpart = INTVAL (el);
12390 else if (CONST_DOUBLE_P (el))
12391 return 0;
12392 else
12393 gcc_unreachable ();
12395 if (i != 0 && elpart != last_elt)
12396 return 0;
12398 last_elt = elpart;
12401 /* Shift less than element size. */
12402 maxshift = innersize * 8;
12404 if (isleftshift)
12406 /* Left shift immediate value can be from 0 to <size>-1. */
12407 if (last_elt >= maxshift)
12408 return 0;
12410 else
12412 /* Right shift immediate value can be from 1 to <size>. */
12413 if (last_elt == 0 || last_elt > maxshift)
12414 return 0;
12417 if (elementwidth)
12418 *elementwidth = innersize * 8;
12420 if (modconst)
12421 *modconst = CONST_VECTOR_ELT (op, 0);
12423 return 1;
12426 /* Return a string suitable for output of Neon immediate logic operation
12427 MNEM. */
12429 char *
12430 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12431 int inverse, int quad)
12433 int width, is_valid;
12434 static char templ[40];
12436 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12438 gcc_assert (is_valid != 0);
12440 if (quad)
12441 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12442 else
12443 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12445 return templ;
12448 /* Return a string suitable for output of Neon immediate shift operation
12449 (VSHR or VSHL) MNEM. */
12451 char *
12452 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12453 machine_mode mode, int quad,
12454 bool isleftshift)
12456 int width, is_valid;
12457 static char templ[40];
12459 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12460 gcc_assert (is_valid != 0);
12462 if (quad)
12463 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12464 else
12465 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12467 return templ;
12470 /* Output a sequence of pairwise operations to implement a reduction.
12471 NOTE: We do "too much work" here, because pairwise operations work on two
12472 registers-worth of operands in one go. Unfortunately we can't exploit those
12473 extra calculations to do the full operation in fewer steps, I don't think.
12474 Although all vector elements of the result but the first are ignored, we
12475 actually calculate the same result in each of the elements. An alternative
12476 such as initially loading a vector with zero to use as each of the second
12477 operands would use up an additional register and take an extra instruction,
12478 for no particular gain. */
12480 void
12481 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12482 rtx (*reduc) (rtx, rtx, rtx))
12484 machine_mode inner = GET_MODE_INNER (mode);
12485 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12486 rtx tmpsum = op1;
12488 for (i = parts / 2; i >= 1; i /= 2)
12490 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12491 emit_insn (reduc (dest, tmpsum, tmpsum));
12492 tmpsum = dest;
12496 /* If VALS is a vector constant that can be loaded into a register
12497 using VDUP, generate instructions to do so and return an RTX to
12498 assign to the register. Otherwise return NULL_RTX. */
12500 static rtx
12501 neon_vdup_constant (rtx vals)
12503 machine_mode mode = GET_MODE (vals);
12504 machine_mode inner_mode = GET_MODE_INNER (mode);
12505 int n_elts = GET_MODE_NUNITS (mode);
12506 bool all_same = true;
12507 rtx x;
12508 int i;
12510 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12511 return NULL_RTX;
12513 for (i = 0; i < n_elts; ++i)
12515 x = XVECEXP (vals, 0, i);
12516 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12517 all_same = false;
12520 if (!all_same)
12521 /* The elements are not all the same. We could handle repeating
12522 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12523 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12524 vdup.i16). */
12525 return NULL_RTX;
12527 /* We can load this constant by using VDUP and a constant in a
12528 single ARM register. This will be cheaper than a vector
12529 load. */
12531 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12532 return gen_rtx_VEC_DUPLICATE (mode, x);
12535 /* Generate code to load VALS, which is a PARALLEL containing only
12536 constants (for vec_init) or CONST_VECTOR, efficiently into a
12537 register. Returns an RTX to copy into the register, or NULL_RTX
12538 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12541 neon_make_constant (rtx vals)
12543 machine_mode mode = GET_MODE (vals);
12544 rtx target;
12545 rtx const_vec = NULL_RTX;
12546 int n_elts = GET_MODE_NUNITS (mode);
12547 int n_const = 0;
12548 int i;
12550 if (GET_CODE (vals) == CONST_VECTOR)
12551 const_vec = vals;
12552 else if (GET_CODE (vals) == PARALLEL)
12554 /* A CONST_VECTOR must contain only CONST_INTs and
12555 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12556 Only store valid constants in a CONST_VECTOR. */
12557 for (i = 0; i < n_elts; ++i)
12559 rtx x = XVECEXP (vals, 0, i);
12560 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12561 n_const++;
12563 if (n_const == n_elts)
12564 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12566 else
12567 gcc_unreachable ();
12569 if (const_vec != NULL
12570 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12571 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12572 return const_vec;
12573 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12574 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12575 pipeline cycle; creating the constant takes one or two ARM
12576 pipeline cycles. */
12577 return target;
12578 else if (const_vec != NULL_RTX)
12579 /* Load from constant pool. On Cortex-A8 this takes two cycles
12580 (for either double or quad vectors). We can not take advantage
12581 of single-cycle VLD1 because we need a PC-relative addressing
12582 mode. */
12583 return const_vec;
12584 else
12585 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12586 We can not construct an initializer. */
12587 return NULL_RTX;
12590 /* Initialize vector TARGET to VALS. */
12592 void
12593 neon_expand_vector_init (rtx target, rtx vals)
12595 machine_mode mode = GET_MODE (target);
12596 machine_mode inner_mode = GET_MODE_INNER (mode);
12597 int n_elts = GET_MODE_NUNITS (mode);
12598 int n_var = 0, one_var = -1;
12599 bool all_same = true;
12600 rtx x, mem;
12601 int i;
12603 for (i = 0; i < n_elts; ++i)
12605 x = XVECEXP (vals, 0, i);
12606 if (!CONSTANT_P (x))
12607 ++n_var, one_var = i;
12609 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12610 all_same = false;
12613 if (n_var == 0)
12615 rtx constant = neon_make_constant (vals);
12616 if (constant != NULL_RTX)
12618 emit_move_insn (target, constant);
12619 return;
12623 /* Splat a single non-constant element if we can. */
12624 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12626 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12627 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12628 return;
12631 /* One field is non-constant. Load constant then overwrite varying
12632 field. This is more efficient than using the stack. */
12633 if (n_var == 1)
12635 rtx copy = copy_rtx (vals);
12636 rtx index = GEN_INT (one_var);
12638 /* Load constant part of vector, substitute neighboring value for
12639 varying element. */
12640 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12641 neon_expand_vector_init (target, copy);
12643 /* Insert variable. */
12644 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12645 switch (mode)
12647 case V8QImode:
12648 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12649 break;
12650 case V16QImode:
12651 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12652 break;
12653 case V4HImode:
12654 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12655 break;
12656 case V8HImode:
12657 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12658 break;
12659 case V2SImode:
12660 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12661 break;
12662 case V4SImode:
12663 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12664 break;
12665 case V2SFmode:
12666 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12667 break;
12668 case V4SFmode:
12669 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12670 break;
12671 case V2DImode:
12672 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12673 break;
12674 default:
12675 gcc_unreachable ();
12677 return;
12680 /* Construct the vector in memory one field at a time
12681 and load the whole vector. */
12682 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12683 for (i = 0; i < n_elts; i++)
12684 emit_move_insn (adjust_address_nv (mem, inner_mode,
12685 i * GET_MODE_SIZE (inner_mode)),
12686 XVECEXP (vals, 0, i));
12687 emit_move_insn (target, mem);
12690 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12691 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12692 reported source locations are bogus. */
12694 static void
12695 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12696 const char *err)
12698 HOST_WIDE_INT lane;
12700 gcc_assert (CONST_INT_P (operand));
12702 lane = INTVAL (operand);
12704 if (lane < low || lane >= high)
12705 error (err);
12708 /* Bounds-check lanes. */
12710 void
12711 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12713 bounds_check (operand, low, high, "lane out of range");
12716 /* Bounds-check constants. */
12718 void
12719 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12721 bounds_check (operand, low, high, "constant out of range");
12724 HOST_WIDE_INT
12725 neon_element_bits (machine_mode mode)
12727 if (mode == DImode)
12728 return GET_MODE_BITSIZE (mode);
12729 else
12730 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12734 /* Predicates for `match_operand' and `match_operator'. */
12736 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12737 WB is true if full writeback address modes are allowed and is false
12738 if limited writeback address modes (POST_INC and PRE_DEC) are
12739 allowed. */
12742 arm_coproc_mem_operand (rtx op, bool wb)
12744 rtx ind;
12746 /* Reject eliminable registers. */
12747 if (! (reload_in_progress || reload_completed || lra_in_progress)
12748 && ( reg_mentioned_p (frame_pointer_rtx, op)
12749 || reg_mentioned_p (arg_pointer_rtx, op)
12750 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12751 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12752 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12753 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12754 return FALSE;
12756 /* Constants are converted into offsets from labels. */
12757 if (!MEM_P (op))
12758 return FALSE;
12760 ind = XEXP (op, 0);
12762 if (reload_completed
12763 && (GET_CODE (ind) == LABEL_REF
12764 || (GET_CODE (ind) == CONST
12765 && GET_CODE (XEXP (ind, 0)) == PLUS
12766 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12767 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12768 return TRUE;
12770 /* Match: (mem (reg)). */
12771 if (REG_P (ind))
12772 return arm_address_register_rtx_p (ind, 0);
12774 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12775 acceptable in any case (subject to verification by
12776 arm_address_register_rtx_p). We need WB to be true to accept
12777 PRE_INC and POST_DEC. */
12778 if (GET_CODE (ind) == POST_INC
12779 || GET_CODE (ind) == PRE_DEC
12780 || (wb
12781 && (GET_CODE (ind) == PRE_INC
12782 || GET_CODE (ind) == POST_DEC)))
12783 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12785 if (wb
12786 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12787 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12788 && GET_CODE (XEXP (ind, 1)) == PLUS
12789 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12790 ind = XEXP (ind, 1);
12792 /* Match:
12793 (plus (reg)
12794 (const)). */
12795 if (GET_CODE (ind) == PLUS
12796 && REG_P (XEXP (ind, 0))
12797 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12798 && CONST_INT_P (XEXP (ind, 1))
12799 && INTVAL (XEXP (ind, 1)) > -1024
12800 && INTVAL (XEXP (ind, 1)) < 1024
12801 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12802 return TRUE;
12804 return FALSE;
12807 /* Return TRUE if OP is a memory operand which we can load or store a vector
12808 to/from. TYPE is one of the following values:
12809 0 - Vector load/stor (vldr)
12810 1 - Core registers (ldm)
12811 2 - Element/structure loads (vld1)
12814 neon_vector_mem_operand (rtx op, int type, bool strict)
12816 rtx ind;
12818 /* Reject eliminable registers. */
12819 if (! (reload_in_progress || reload_completed)
12820 && ( reg_mentioned_p (frame_pointer_rtx, op)
12821 || reg_mentioned_p (arg_pointer_rtx, op)
12822 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12823 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12824 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12825 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12826 return !strict;
12828 /* Constants are converted into offsets from labels. */
12829 if (!MEM_P (op))
12830 return FALSE;
12832 ind = XEXP (op, 0);
12834 if (reload_completed
12835 && (GET_CODE (ind) == LABEL_REF
12836 || (GET_CODE (ind) == CONST
12837 && GET_CODE (XEXP (ind, 0)) == PLUS
12838 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12839 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12840 return TRUE;
12842 /* Match: (mem (reg)). */
12843 if (REG_P (ind))
12844 return arm_address_register_rtx_p (ind, 0);
12846 /* Allow post-increment with Neon registers. */
12847 if ((type != 1 && GET_CODE (ind) == POST_INC)
12848 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12849 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12851 /* Allow post-increment by register for VLDn */
12852 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12853 && GET_CODE (XEXP (ind, 1)) == PLUS
12854 && REG_P (XEXP (XEXP (ind, 1), 1)))
12855 return true;
12857 /* Match:
12858 (plus (reg)
12859 (const)). */
12860 if (type == 0
12861 && GET_CODE (ind) == PLUS
12862 && REG_P (XEXP (ind, 0))
12863 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12864 && CONST_INT_P (XEXP (ind, 1))
12865 && INTVAL (XEXP (ind, 1)) > -1024
12866 /* For quad modes, we restrict the constant offset to be slightly less
12867 than what the instruction format permits. We have no such constraint
12868 on double mode offsets. (This must match arm_legitimate_index_p.) */
12869 && (INTVAL (XEXP (ind, 1))
12870 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12871 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12872 return TRUE;
12874 return FALSE;
12877 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12878 type. */
12880 neon_struct_mem_operand (rtx op)
12882 rtx ind;
12884 /* Reject eliminable registers. */
12885 if (! (reload_in_progress || reload_completed)
12886 && ( reg_mentioned_p (frame_pointer_rtx, op)
12887 || reg_mentioned_p (arg_pointer_rtx, op)
12888 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12889 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12890 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12891 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12892 return FALSE;
12894 /* Constants are converted into offsets from labels. */
12895 if (!MEM_P (op))
12896 return FALSE;
12898 ind = XEXP (op, 0);
12900 if (reload_completed
12901 && (GET_CODE (ind) == LABEL_REF
12902 || (GET_CODE (ind) == CONST
12903 && GET_CODE (XEXP (ind, 0)) == PLUS
12904 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12905 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12906 return TRUE;
12908 /* Match: (mem (reg)). */
12909 if (REG_P (ind))
12910 return arm_address_register_rtx_p (ind, 0);
12912 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12913 if (GET_CODE (ind) == POST_INC
12914 || GET_CODE (ind) == PRE_DEC)
12915 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12917 return FALSE;
12920 /* Return true if X is a register that will be eliminated later on. */
12922 arm_eliminable_register (rtx x)
12924 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12925 || REGNO (x) == ARG_POINTER_REGNUM
12926 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12927 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12930 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12931 coprocessor registers. Otherwise return NO_REGS. */
12933 enum reg_class
12934 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12936 if (mode == HFmode)
12938 if (!TARGET_NEON_FP16)
12939 return GENERAL_REGS;
12940 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12941 return NO_REGS;
12942 return GENERAL_REGS;
12945 /* The neon move patterns handle all legitimate vector and struct
12946 addresses. */
12947 if (TARGET_NEON
12948 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12949 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12950 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12951 || VALID_NEON_STRUCT_MODE (mode)))
12952 return NO_REGS;
12954 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12955 return NO_REGS;
12957 return GENERAL_REGS;
12960 /* Values which must be returned in the most-significant end of the return
12961 register. */
12963 static bool
12964 arm_return_in_msb (const_tree valtype)
12966 return (TARGET_AAPCS_BASED
12967 && BYTES_BIG_ENDIAN
12968 && (AGGREGATE_TYPE_P (valtype)
12969 || TREE_CODE (valtype) == COMPLEX_TYPE
12970 || FIXED_POINT_TYPE_P (valtype)));
12973 /* Return TRUE if X references a SYMBOL_REF. */
12975 symbol_mentioned_p (rtx x)
12977 const char * fmt;
12978 int i;
12980 if (GET_CODE (x) == SYMBOL_REF)
12981 return 1;
12983 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12984 are constant offsets, not symbols. */
12985 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12986 return 0;
12988 fmt = GET_RTX_FORMAT (GET_CODE (x));
12990 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12992 if (fmt[i] == 'E')
12994 int j;
12996 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12997 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12998 return 1;
13000 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13001 return 1;
13004 return 0;
13007 /* Return TRUE if X references a LABEL_REF. */
13009 label_mentioned_p (rtx x)
13011 const char * fmt;
13012 int i;
13014 if (GET_CODE (x) == LABEL_REF)
13015 return 1;
13017 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13018 instruction, but they are constant offsets, not symbols. */
13019 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13020 return 0;
13022 fmt = GET_RTX_FORMAT (GET_CODE (x));
13023 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13025 if (fmt[i] == 'E')
13027 int j;
13029 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13030 if (label_mentioned_p (XVECEXP (x, i, j)))
13031 return 1;
13033 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13034 return 1;
13037 return 0;
13041 tls_mentioned_p (rtx x)
13043 switch (GET_CODE (x))
13045 case CONST:
13046 return tls_mentioned_p (XEXP (x, 0));
13048 case UNSPEC:
13049 if (XINT (x, 1) == UNSPEC_TLS)
13050 return 1;
13052 default:
13053 return 0;
13057 /* Must not copy any rtx that uses a pc-relative address. */
13059 static bool
13060 arm_cannot_copy_insn_p (rtx_insn *insn)
13062 /* The tls call insn cannot be copied, as it is paired with a data
13063 word. */
13064 if (recog_memoized (insn) == CODE_FOR_tlscall)
13065 return true;
13067 subrtx_iterator::array_type array;
13068 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13070 const_rtx x = *iter;
13071 if (GET_CODE (x) == UNSPEC
13072 && (XINT (x, 1) == UNSPEC_PIC_BASE
13073 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13074 return true;
13076 return false;
13079 enum rtx_code
13080 minmax_code (rtx x)
13082 enum rtx_code code = GET_CODE (x);
13084 switch (code)
13086 case SMAX:
13087 return GE;
13088 case SMIN:
13089 return LE;
13090 case UMIN:
13091 return LEU;
13092 case UMAX:
13093 return GEU;
13094 default:
13095 gcc_unreachable ();
13099 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13101 bool
13102 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13103 int *mask, bool *signed_sat)
13105 /* The high bound must be a power of two minus one. */
13106 int log = exact_log2 (INTVAL (hi_bound) + 1);
13107 if (log == -1)
13108 return false;
13110 /* The low bound is either zero (for usat) or one less than the
13111 negation of the high bound (for ssat). */
13112 if (INTVAL (lo_bound) == 0)
13114 if (mask)
13115 *mask = log;
13116 if (signed_sat)
13117 *signed_sat = false;
13119 return true;
13122 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13124 if (mask)
13125 *mask = log + 1;
13126 if (signed_sat)
13127 *signed_sat = true;
13129 return true;
13132 return false;
13135 /* Return 1 if memory locations are adjacent. */
13137 adjacent_mem_locations (rtx a, rtx b)
13139 /* We don't guarantee to preserve the order of these memory refs. */
13140 if (volatile_refs_p (a) || volatile_refs_p (b))
13141 return 0;
13143 if ((REG_P (XEXP (a, 0))
13144 || (GET_CODE (XEXP (a, 0)) == PLUS
13145 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13146 && (REG_P (XEXP (b, 0))
13147 || (GET_CODE (XEXP (b, 0)) == PLUS
13148 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13150 HOST_WIDE_INT val0 = 0, val1 = 0;
13151 rtx reg0, reg1;
13152 int val_diff;
13154 if (GET_CODE (XEXP (a, 0)) == PLUS)
13156 reg0 = XEXP (XEXP (a, 0), 0);
13157 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13159 else
13160 reg0 = XEXP (a, 0);
13162 if (GET_CODE (XEXP (b, 0)) == PLUS)
13164 reg1 = XEXP (XEXP (b, 0), 0);
13165 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13167 else
13168 reg1 = XEXP (b, 0);
13170 /* Don't accept any offset that will require multiple
13171 instructions to handle, since this would cause the
13172 arith_adjacentmem pattern to output an overlong sequence. */
13173 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13174 return 0;
13176 /* Don't allow an eliminable register: register elimination can make
13177 the offset too large. */
13178 if (arm_eliminable_register (reg0))
13179 return 0;
13181 val_diff = val1 - val0;
13183 if (arm_ld_sched)
13185 /* If the target has load delay slots, then there's no benefit
13186 to using an ldm instruction unless the offset is zero and
13187 we are optimizing for size. */
13188 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13189 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13190 && (val_diff == 4 || val_diff == -4));
13193 return ((REGNO (reg0) == REGNO (reg1))
13194 && (val_diff == 4 || val_diff == -4));
13197 return 0;
13200 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13201 for load operations, false for store operations. CONSECUTIVE is true
13202 if the register numbers in the operation must be consecutive in the register
13203 bank. RETURN_PC is true if value is to be loaded in PC.
13204 The pattern we are trying to match for load is:
13205 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13206 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13209 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13211 where
13212 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13213 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13214 3. If consecutive is TRUE, then for kth register being loaded,
13215 REGNO (R_dk) = REGNO (R_d0) + k.
13216 The pattern for store is similar. */
13217 bool
13218 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13219 bool consecutive, bool return_pc)
13221 HOST_WIDE_INT count = XVECLEN (op, 0);
13222 rtx reg, mem, addr;
13223 unsigned regno;
13224 unsigned first_regno;
13225 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13226 rtx elt;
13227 bool addr_reg_in_reglist = false;
13228 bool update = false;
13229 int reg_increment;
13230 int offset_adj;
13231 int regs_per_val;
13233 /* If not in SImode, then registers must be consecutive
13234 (e.g., VLDM instructions for DFmode). */
13235 gcc_assert ((mode == SImode) || consecutive);
13236 /* Setting return_pc for stores is illegal. */
13237 gcc_assert (!return_pc || load);
13239 /* Set up the increments and the regs per val based on the mode. */
13240 reg_increment = GET_MODE_SIZE (mode);
13241 regs_per_val = reg_increment / 4;
13242 offset_adj = return_pc ? 1 : 0;
13244 if (count <= 1
13245 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13246 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13247 return false;
13249 /* Check if this is a write-back. */
13250 elt = XVECEXP (op, 0, offset_adj);
13251 if (GET_CODE (SET_SRC (elt)) == PLUS)
13253 i++;
13254 base = 1;
13255 update = true;
13257 /* The offset adjustment must be the number of registers being
13258 popped times the size of a single register. */
13259 if (!REG_P (SET_DEST (elt))
13260 || !REG_P (XEXP (SET_SRC (elt), 0))
13261 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13262 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13263 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13264 ((count - 1 - offset_adj) * reg_increment))
13265 return false;
13268 i = i + offset_adj;
13269 base = base + offset_adj;
13270 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13271 success depends on the type: VLDM can do just one reg,
13272 LDM must do at least two. */
13273 if ((count <= i) && (mode == SImode))
13274 return false;
13276 elt = XVECEXP (op, 0, i - 1);
13277 if (GET_CODE (elt) != SET)
13278 return false;
13280 if (load)
13282 reg = SET_DEST (elt);
13283 mem = SET_SRC (elt);
13285 else
13287 reg = SET_SRC (elt);
13288 mem = SET_DEST (elt);
13291 if (!REG_P (reg) || !MEM_P (mem))
13292 return false;
13294 regno = REGNO (reg);
13295 first_regno = regno;
13296 addr = XEXP (mem, 0);
13297 if (GET_CODE (addr) == PLUS)
13299 if (!CONST_INT_P (XEXP (addr, 1)))
13300 return false;
13302 offset = INTVAL (XEXP (addr, 1));
13303 addr = XEXP (addr, 0);
13306 if (!REG_P (addr))
13307 return false;
13309 /* Don't allow SP to be loaded unless it is also the base register. It
13310 guarantees that SP is reset correctly when an LDM instruction
13311 is interrupted. Otherwise, we might end up with a corrupt stack. */
13312 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13313 return false;
13315 for (; i < count; i++)
13317 elt = XVECEXP (op, 0, i);
13318 if (GET_CODE (elt) != SET)
13319 return false;
13321 if (load)
13323 reg = SET_DEST (elt);
13324 mem = SET_SRC (elt);
13326 else
13328 reg = SET_SRC (elt);
13329 mem = SET_DEST (elt);
13332 if (!REG_P (reg)
13333 || GET_MODE (reg) != mode
13334 || REGNO (reg) <= regno
13335 || (consecutive
13336 && (REGNO (reg) !=
13337 (unsigned int) (first_regno + regs_per_val * (i - base))))
13338 /* Don't allow SP to be loaded unless it is also the base register. It
13339 guarantees that SP is reset correctly when an LDM instruction
13340 is interrupted. Otherwise, we might end up with a corrupt stack. */
13341 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13342 || !MEM_P (mem)
13343 || GET_MODE (mem) != mode
13344 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13345 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13346 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13347 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13348 offset + (i - base) * reg_increment))
13349 && (!REG_P (XEXP (mem, 0))
13350 || offset + (i - base) * reg_increment != 0)))
13351 return false;
13353 regno = REGNO (reg);
13354 if (regno == REGNO (addr))
13355 addr_reg_in_reglist = true;
13358 if (load)
13360 if (update && addr_reg_in_reglist)
13361 return false;
13363 /* For Thumb-1, address register is always modified - either by write-back
13364 or by explicit load. If the pattern does not describe an update,
13365 then the address register must be in the list of loaded registers. */
13366 if (TARGET_THUMB1)
13367 return update || addr_reg_in_reglist;
13370 return true;
13373 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13374 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13375 instruction. ADD_OFFSET is nonzero if the base address register needs
13376 to be modified with an add instruction before we can use it. */
13378 static bool
13379 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13380 int nops, HOST_WIDE_INT add_offset)
13382 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13383 if the offset isn't small enough. The reason 2 ldrs are faster
13384 is because these ARMs are able to do more than one cache access
13385 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13386 whilst the ARM8 has a double bandwidth cache. This means that
13387 these cores can do both an instruction fetch and a data fetch in
13388 a single cycle, so the trick of calculating the address into a
13389 scratch register (one of the result regs) and then doing a load
13390 multiple actually becomes slower (and no smaller in code size).
13391 That is the transformation
13393 ldr rd1, [rbase + offset]
13394 ldr rd2, [rbase + offset + 4]
13398 add rd1, rbase, offset
13399 ldmia rd1, {rd1, rd2}
13401 produces worse code -- '3 cycles + any stalls on rd2' instead of
13402 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13403 access per cycle, the first sequence could never complete in less
13404 than 6 cycles, whereas the ldm sequence would only take 5 and
13405 would make better use of sequential accesses if not hitting the
13406 cache.
13408 We cheat here and test 'arm_ld_sched' which we currently know to
13409 only be true for the ARM8, ARM9 and StrongARM. If this ever
13410 changes, then the test below needs to be reworked. */
13411 if (nops == 2 && arm_ld_sched && add_offset != 0)
13412 return false;
13414 /* XScale has load-store double instructions, but they have stricter
13415 alignment requirements than load-store multiple, so we cannot
13416 use them.
13418 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13419 the pipeline until completion.
13421 NREGS CYCLES
13427 An ldr instruction takes 1-3 cycles, but does not block the
13428 pipeline.
13430 NREGS CYCLES
13431 1 1-3
13432 2 2-6
13433 3 3-9
13434 4 4-12
13436 Best case ldr will always win. However, the more ldr instructions
13437 we issue, the less likely we are to be able to schedule them well.
13438 Using ldr instructions also increases code size.
13440 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13441 for counts of 3 or 4 regs. */
13442 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13443 return false;
13444 return true;
13447 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13448 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13449 an array ORDER which describes the sequence to use when accessing the
13450 offsets that produces an ascending order. In this sequence, each
13451 offset must be larger by exactly 4 than the previous one. ORDER[0]
13452 must have been filled in with the lowest offset by the caller.
13453 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13454 we use to verify that ORDER produces an ascending order of registers.
13455 Return true if it was possible to construct such an order, false if
13456 not. */
13458 static bool
13459 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13460 int *unsorted_regs)
13462 int i;
13463 for (i = 1; i < nops; i++)
13465 int j;
13467 order[i] = order[i - 1];
13468 for (j = 0; j < nops; j++)
13469 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13471 /* We must find exactly one offset that is higher than the
13472 previous one by 4. */
13473 if (order[i] != order[i - 1])
13474 return false;
13475 order[i] = j;
13477 if (order[i] == order[i - 1])
13478 return false;
13479 /* The register numbers must be ascending. */
13480 if (unsorted_regs != NULL
13481 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13482 return false;
13484 return true;
13487 /* Used to determine in a peephole whether a sequence of load
13488 instructions can be changed into a load-multiple instruction.
13489 NOPS is the number of separate load instructions we are examining. The
13490 first NOPS entries in OPERANDS are the destination registers, the
13491 next NOPS entries are memory operands. If this function is
13492 successful, *BASE is set to the common base register of the memory
13493 accesses; *LOAD_OFFSET is set to the first memory location's offset
13494 from that base register.
13495 REGS is an array filled in with the destination register numbers.
13496 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13497 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13498 the sequence of registers in REGS matches the loads from ascending memory
13499 locations, and the function verifies that the register numbers are
13500 themselves ascending. If CHECK_REGS is false, the register numbers
13501 are stored in the order they are found in the operands. */
13502 static int
13503 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13504 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13506 int unsorted_regs[MAX_LDM_STM_OPS];
13507 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13508 int order[MAX_LDM_STM_OPS];
13509 rtx base_reg_rtx = NULL;
13510 int base_reg = -1;
13511 int i, ldm_case;
13513 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13514 easily extended if required. */
13515 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13517 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13519 /* Loop over the operands and check that the memory references are
13520 suitable (i.e. immediate offsets from the same base register). At
13521 the same time, extract the target register, and the memory
13522 offsets. */
13523 for (i = 0; i < nops; i++)
13525 rtx reg;
13526 rtx offset;
13528 /* Convert a subreg of a mem into the mem itself. */
13529 if (GET_CODE (operands[nops + i]) == SUBREG)
13530 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13532 gcc_assert (MEM_P (operands[nops + i]));
13534 /* Don't reorder volatile memory references; it doesn't seem worth
13535 looking for the case where the order is ok anyway. */
13536 if (MEM_VOLATILE_P (operands[nops + i]))
13537 return 0;
13539 offset = const0_rtx;
13541 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13542 || (GET_CODE (reg) == SUBREG
13543 && REG_P (reg = SUBREG_REG (reg))))
13544 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13545 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13546 || (GET_CODE (reg) == SUBREG
13547 && REG_P (reg = SUBREG_REG (reg))))
13548 && (CONST_INT_P (offset
13549 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13551 if (i == 0)
13553 base_reg = REGNO (reg);
13554 base_reg_rtx = reg;
13555 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13556 return 0;
13558 else if (base_reg != (int) REGNO (reg))
13559 /* Not addressed from the same base register. */
13560 return 0;
13562 unsorted_regs[i] = (REG_P (operands[i])
13563 ? REGNO (operands[i])
13564 : REGNO (SUBREG_REG (operands[i])));
13566 /* If it isn't an integer register, or if it overwrites the
13567 base register but isn't the last insn in the list, then
13568 we can't do this. */
13569 if (unsorted_regs[i] < 0
13570 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13571 || unsorted_regs[i] > 14
13572 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13573 return 0;
13575 /* Don't allow SP to be loaded unless it is also the base
13576 register. It guarantees that SP is reset correctly when
13577 an LDM instruction is interrupted. Otherwise, we might
13578 end up with a corrupt stack. */
13579 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13580 return 0;
13582 unsorted_offsets[i] = INTVAL (offset);
13583 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13584 order[0] = i;
13586 else
13587 /* Not a suitable memory address. */
13588 return 0;
13591 /* All the useful information has now been extracted from the
13592 operands into unsorted_regs and unsorted_offsets; additionally,
13593 order[0] has been set to the lowest offset in the list. Sort
13594 the offsets into order, verifying that they are adjacent, and
13595 check that the register numbers are ascending. */
13596 if (!compute_offset_order (nops, unsorted_offsets, order,
13597 check_regs ? unsorted_regs : NULL))
13598 return 0;
13600 if (saved_order)
13601 memcpy (saved_order, order, sizeof order);
13603 if (base)
13605 *base = base_reg;
13607 for (i = 0; i < nops; i++)
13608 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13610 *load_offset = unsorted_offsets[order[0]];
13613 if (TARGET_THUMB1
13614 && !peep2_reg_dead_p (nops, base_reg_rtx))
13615 return 0;
13617 if (unsorted_offsets[order[0]] == 0)
13618 ldm_case = 1; /* ldmia */
13619 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13620 ldm_case = 2; /* ldmib */
13621 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13622 ldm_case = 3; /* ldmda */
13623 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13624 ldm_case = 4; /* ldmdb */
13625 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13626 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13627 ldm_case = 5;
13628 else
13629 return 0;
13631 if (!multiple_operation_profitable_p (false, nops,
13632 ldm_case == 5
13633 ? unsorted_offsets[order[0]] : 0))
13634 return 0;
13636 return ldm_case;
13639 /* Used to determine in a peephole whether a sequence of store instructions can
13640 be changed into a store-multiple instruction.
13641 NOPS is the number of separate store instructions we are examining.
13642 NOPS_TOTAL is the total number of instructions recognized by the peephole
13643 pattern.
13644 The first NOPS entries in OPERANDS are the source registers, the next
13645 NOPS entries are memory operands. If this function is successful, *BASE is
13646 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13647 to the first memory location's offset from that base register. REGS is an
13648 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13649 likewise filled with the corresponding rtx's.
13650 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13651 numbers to an ascending order of stores.
13652 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13653 from ascending memory locations, and the function verifies that the register
13654 numbers are themselves ascending. If CHECK_REGS is false, the register
13655 numbers are stored in the order they are found in the operands. */
13656 static int
13657 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13658 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13659 HOST_WIDE_INT *load_offset, bool check_regs)
13661 int unsorted_regs[MAX_LDM_STM_OPS];
13662 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13663 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13664 int order[MAX_LDM_STM_OPS];
13665 int base_reg = -1;
13666 rtx base_reg_rtx = NULL;
13667 int i, stm_case;
13669 /* Write back of base register is currently only supported for Thumb 1. */
13670 int base_writeback = TARGET_THUMB1;
13672 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13673 easily extended if required. */
13674 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13676 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13678 /* Loop over the operands and check that the memory references are
13679 suitable (i.e. immediate offsets from the same base register). At
13680 the same time, extract the target register, and the memory
13681 offsets. */
13682 for (i = 0; i < nops; i++)
13684 rtx reg;
13685 rtx offset;
13687 /* Convert a subreg of a mem into the mem itself. */
13688 if (GET_CODE (operands[nops + i]) == SUBREG)
13689 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13691 gcc_assert (MEM_P (operands[nops + i]));
13693 /* Don't reorder volatile memory references; it doesn't seem worth
13694 looking for the case where the order is ok anyway. */
13695 if (MEM_VOLATILE_P (operands[nops + i]))
13696 return 0;
13698 offset = const0_rtx;
13700 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13701 || (GET_CODE (reg) == SUBREG
13702 && REG_P (reg = SUBREG_REG (reg))))
13703 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13704 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13705 || (GET_CODE (reg) == SUBREG
13706 && REG_P (reg = SUBREG_REG (reg))))
13707 && (CONST_INT_P (offset
13708 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13710 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13711 ? operands[i] : SUBREG_REG (operands[i]));
13712 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13714 if (i == 0)
13716 base_reg = REGNO (reg);
13717 base_reg_rtx = reg;
13718 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13719 return 0;
13721 else if (base_reg != (int) REGNO (reg))
13722 /* Not addressed from the same base register. */
13723 return 0;
13725 /* If it isn't an integer register, then we can't do this. */
13726 if (unsorted_regs[i] < 0
13727 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13728 /* The effects are unpredictable if the base register is
13729 both updated and stored. */
13730 || (base_writeback && unsorted_regs[i] == base_reg)
13731 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13732 || unsorted_regs[i] > 14)
13733 return 0;
13735 unsorted_offsets[i] = INTVAL (offset);
13736 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13737 order[0] = i;
13739 else
13740 /* Not a suitable memory address. */
13741 return 0;
13744 /* All the useful information has now been extracted from the
13745 operands into unsorted_regs and unsorted_offsets; additionally,
13746 order[0] has been set to the lowest offset in the list. Sort
13747 the offsets into order, verifying that they are adjacent, and
13748 check that the register numbers are ascending. */
13749 if (!compute_offset_order (nops, unsorted_offsets, order,
13750 check_regs ? unsorted_regs : NULL))
13751 return 0;
13753 if (saved_order)
13754 memcpy (saved_order, order, sizeof order);
13756 if (base)
13758 *base = base_reg;
13760 for (i = 0; i < nops; i++)
13762 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13763 if (reg_rtxs)
13764 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13767 *load_offset = unsorted_offsets[order[0]];
13770 if (TARGET_THUMB1
13771 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13772 return 0;
13774 if (unsorted_offsets[order[0]] == 0)
13775 stm_case = 1; /* stmia */
13776 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13777 stm_case = 2; /* stmib */
13778 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13779 stm_case = 3; /* stmda */
13780 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13781 stm_case = 4; /* stmdb */
13782 else
13783 return 0;
13785 if (!multiple_operation_profitable_p (false, nops, 0))
13786 return 0;
13788 return stm_case;
13791 /* Routines for use in generating RTL. */
13793 /* Generate a load-multiple instruction. COUNT is the number of loads in
13794 the instruction; REGS and MEMS are arrays containing the operands.
13795 BASEREG is the base register to be used in addressing the memory operands.
13796 WBACK_OFFSET is nonzero if the instruction should update the base
13797 register. */
13799 static rtx
13800 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13801 HOST_WIDE_INT wback_offset)
13803 int i = 0, j;
13804 rtx result;
13806 if (!multiple_operation_profitable_p (false, count, 0))
13808 rtx seq;
13810 start_sequence ();
13812 for (i = 0; i < count; i++)
13813 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13815 if (wback_offset != 0)
13816 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13818 seq = get_insns ();
13819 end_sequence ();
13821 return seq;
13824 result = gen_rtx_PARALLEL (VOIDmode,
13825 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13826 if (wback_offset != 0)
13828 XVECEXP (result, 0, 0)
13829 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13830 i = 1;
13831 count++;
13834 for (j = 0; i < count; i++, j++)
13835 XVECEXP (result, 0, i)
13836 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13838 return result;
13841 /* Generate a store-multiple instruction. COUNT is the number of stores in
13842 the instruction; REGS and MEMS are arrays containing the operands.
13843 BASEREG is the base register to be used in addressing the memory operands.
13844 WBACK_OFFSET is nonzero if the instruction should update the base
13845 register. */
13847 static rtx
13848 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13849 HOST_WIDE_INT wback_offset)
13851 int i = 0, j;
13852 rtx result;
13854 if (GET_CODE (basereg) == PLUS)
13855 basereg = XEXP (basereg, 0);
13857 if (!multiple_operation_profitable_p (false, count, 0))
13859 rtx seq;
13861 start_sequence ();
13863 for (i = 0; i < count; i++)
13864 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13866 if (wback_offset != 0)
13867 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13869 seq = get_insns ();
13870 end_sequence ();
13872 return seq;
13875 result = gen_rtx_PARALLEL (VOIDmode,
13876 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13877 if (wback_offset != 0)
13879 XVECEXP (result, 0, 0)
13880 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13881 i = 1;
13882 count++;
13885 for (j = 0; i < count; i++, j++)
13886 XVECEXP (result, 0, i)
13887 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13889 return result;
13892 /* Generate either a load-multiple or a store-multiple instruction. This
13893 function can be used in situations where we can start with a single MEM
13894 rtx and adjust its address upwards.
13895 COUNT is the number of operations in the instruction, not counting a
13896 possible update of the base register. REGS is an array containing the
13897 register operands.
13898 BASEREG is the base register to be used in addressing the memory operands,
13899 which are constructed from BASEMEM.
13900 WRITE_BACK specifies whether the generated instruction should include an
13901 update of the base register.
13902 OFFSETP is used to pass an offset to and from this function; this offset
13903 is not used when constructing the address (instead BASEMEM should have an
13904 appropriate offset in its address), it is used only for setting
13905 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13907 static rtx
13908 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13909 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13911 rtx mems[MAX_LDM_STM_OPS];
13912 HOST_WIDE_INT offset = *offsetp;
13913 int i;
13915 gcc_assert (count <= MAX_LDM_STM_OPS);
13917 if (GET_CODE (basereg) == PLUS)
13918 basereg = XEXP (basereg, 0);
13920 for (i = 0; i < count; i++)
13922 rtx addr = plus_constant (Pmode, basereg, i * 4);
13923 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13924 offset += 4;
13927 if (write_back)
13928 *offsetp = offset;
13930 if (is_load)
13931 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13932 write_back ? 4 * count : 0);
13933 else
13934 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13935 write_back ? 4 * count : 0);
13939 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13940 rtx basemem, HOST_WIDE_INT *offsetp)
13942 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13943 offsetp);
13947 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13948 rtx basemem, HOST_WIDE_INT *offsetp)
13950 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13951 offsetp);
13954 /* Called from a peephole2 expander to turn a sequence of loads into an
13955 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13956 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13957 is true if we can reorder the registers because they are used commutatively
13958 subsequently.
13959 Returns true iff we could generate a new instruction. */
13961 bool
13962 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13964 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13965 rtx mems[MAX_LDM_STM_OPS];
13966 int i, j, base_reg;
13967 rtx base_reg_rtx;
13968 HOST_WIDE_INT offset;
13969 int write_back = FALSE;
13970 int ldm_case;
13971 rtx addr;
13973 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13974 &base_reg, &offset, !sort_regs);
13976 if (ldm_case == 0)
13977 return false;
13979 if (sort_regs)
13980 for (i = 0; i < nops - 1; i++)
13981 for (j = i + 1; j < nops; j++)
13982 if (regs[i] > regs[j])
13984 int t = regs[i];
13985 regs[i] = regs[j];
13986 regs[j] = t;
13988 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13990 if (TARGET_THUMB1)
13992 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13993 gcc_assert (ldm_case == 1 || ldm_case == 5);
13994 write_back = TRUE;
13997 if (ldm_case == 5)
13999 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14000 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14001 offset = 0;
14002 if (!TARGET_THUMB1)
14004 base_reg = regs[0];
14005 base_reg_rtx = newbase;
14009 for (i = 0; i < nops; i++)
14011 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14012 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14013 SImode, addr, 0);
14015 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14016 write_back ? offset + i * 4 : 0));
14017 return true;
14020 /* Called from a peephole2 expander to turn a sequence of stores into an
14021 STM instruction. OPERANDS are the operands found by the peephole matcher;
14022 NOPS indicates how many separate stores we are trying to combine.
14023 Returns true iff we could generate a new instruction. */
14025 bool
14026 gen_stm_seq (rtx *operands, int nops)
14028 int i;
14029 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14030 rtx mems[MAX_LDM_STM_OPS];
14031 int base_reg;
14032 rtx base_reg_rtx;
14033 HOST_WIDE_INT offset;
14034 int write_back = FALSE;
14035 int stm_case;
14036 rtx addr;
14037 bool base_reg_dies;
14039 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14040 mem_order, &base_reg, &offset, true);
14042 if (stm_case == 0)
14043 return false;
14045 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14047 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14048 if (TARGET_THUMB1)
14050 gcc_assert (base_reg_dies);
14051 write_back = TRUE;
14054 if (stm_case == 5)
14056 gcc_assert (base_reg_dies);
14057 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14058 offset = 0;
14061 addr = plus_constant (Pmode, base_reg_rtx, offset);
14063 for (i = 0; i < nops; i++)
14065 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14066 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14067 SImode, addr, 0);
14069 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14070 write_back ? offset + i * 4 : 0));
14071 return true;
14074 /* Called from a peephole2 expander to turn a sequence of stores that are
14075 preceded by constant loads into an STM instruction. OPERANDS are the
14076 operands found by the peephole matcher; NOPS indicates how many
14077 separate stores we are trying to combine; there are 2 * NOPS
14078 instructions in the peephole.
14079 Returns true iff we could generate a new instruction. */
14081 bool
14082 gen_const_stm_seq (rtx *operands, int nops)
14084 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14085 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14086 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14087 rtx mems[MAX_LDM_STM_OPS];
14088 int base_reg;
14089 rtx base_reg_rtx;
14090 HOST_WIDE_INT offset;
14091 int write_back = FALSE;
14092 int stm_case;
14093 rtx addr;
14094 bool base_reg_dies;
14095 int i, j;
14096 HARD_REG_SET allocated;
14098 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14099 mem_order, &base_reg, &offset, false);
14101 if (stm_case == 0)
14102 return false;
14104 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14106 /* If the same register is used more than once, try to find a free
14107 register. */
14108 CLEAR_HARD_REG_SET (allocated);
14109 for (i = 0; i < nops; i++)
14111 for (j = i + 1; j < nops; j++)
14112 if (regs[i] == regs[j])
14114 rtx t = peep2_find_free_register (0, nops * 2,
14115 TARGET_THUMB1 ? "l" : "r",
14116 SImode, &allocated);
14117 if (t == NULL_RTX)
14118 return false;
14119 reg_rtxs[i] = t;
14120 regs[i] = REGNO (t);
14124 /* Compute an ordering that maps the register numbers to an ascending
14125 sequence. */
14126 reg_order[0] = 0;
14127 for (i = 0; i < nops; i++)
14128 if (regs[i] < regs[reg_order[0]])
14129 reg_order[0] = i;
14131 for (i = 1; i < nops; i++)
14133 int this_order = reg_order[i - 1];
14134 for (j = 0; j < nops; j++)
14135 if (regs[j] > regs[reg_order[i - 1]]
14136 && (this_order == reg_order[i - 1]
14137 || regs[j] < regs[this_order]))
14138 this_order = j;
14139 reg_order[i] = this_order;
14142 /* Ensure that registers that must be live after the instruction end
14143 up with the correct value. */
14144 for (i = 0; i < nops; i++)
14146 int this_order = reg_order[i];
14147 if ((this_order != mem_order[i]
14148 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14149 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14150 return false;
14153 /* Load the constants. */
14154 for (i = 0; i < nops; i++)
14156 rtx op = operands[2 * nops + mem_order[i]];
14157 sorted_regs[i] = regs[reg_order[i]];
14158 emit_move_insn (reg_rtxs[reg_order[i]], op);
14161 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14163 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14164 if (TARGET_THUMB1)
14166 gcc_assert (base_reg_dies);
14167 write_back = TRUE;
14170 if (stm_case == 5)
14172 gcc_assert (base_reg_dies);
14173 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14174 offset = 0;
14177 addr = plus_constant (Pmode, base_reg_rtx, offset);
14179 for (i = 0; i < nops; i++)
14181 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14182 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14183 SImode, addr, 0);
14185 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14186 write_back ? offset + i * 4 : 0));
14187 return true;
14190 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14191 unaligned copies on processors which support unaligned semantics for those
14192 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14193 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14194 An interleave factor of 1 (the minimum) will perform no interleaving.
14195 Load/store multiple are used for aligned addresses where possible. */
14197 static void
14198 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14199 HOST_WIDE_INT length,
14200 unsigned int interleave_factor)
14202 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14203 int *regnos = XALLOCAVEC (int, interleave_factor);
14204 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14205 HOST_WIDE_INT i, j;
14206 HOST_WIDE_INT remaining = length, words;
14207 rtx halfword_tmp = NULL, byte_tmp = NULL;
14208 rtx dst, src;
14209 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14210 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14211 HOST_WIDE_INT srcoffset, dstoffset;
14212 HOST_WIDE_INT src_autoinc, dst_autoinc;
14213 rtx mem, addr;
14215 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14217 /* Use hard registers if we have aligned source or destination so we can use
14218 load/store multiple with contiguous registers. */
14219 if (dst_aligned || src_aligned)
14220 for (i = 0; i < interleave_factor; i++)
14221 regs[i] = gen_rtx_REG (SImode, i);
14222 else
14223 for (i = 0; i < interleave_factor; i++)
14224 regs[i] = gen_reg_rtx (SImode);
14226 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14227 src = copy_addr_to_reg (XEXP (srcbase, 0));
14229 srcoffset = dstoffset = 0;
14231 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14232 For copying the last bytes we want to subtract this offset again. */
14233 src_autoinc = dst_autoinc = 0;
14235 for (i = 0; i < interleave_factor; i++)
14236 regnos[i] = i;
14238 /* Copy BLOCK_SIZE_BYTES chunks. */
14240 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14242 /* Load words. */
14243 if (src_aligned && interleave_factor > 1)
14245 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14246 TRUE, srcbase, &srcoffset));
14247 src_autoinc += UNITS_PER_WORD * interleave_factor;
14249 else
14251 for (j = 0; j < interleave_factor; j++)
14253 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14254 - src_autoinc));
14255 mem = adjust_automodify_address (srcbase, SImode, addr,
14256 srcoffset + j * UNITS_PER_WORD);
14257 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14259 srcoffset += block_size_bytes;
14262 /* Store words. */
14263 if (dst_aligned && interleave_factor > 1)
14265 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14266 TRUE, dstbase, &dstoffset));
14267 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14269 else
14271 for (j = 0; j < interleave_factor; j++)
14273 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14274 - dst_autoinc));
14275 mem = adjust_automodify_address (dstbase, SImode, addr,
14276 dstoffset + j * UNITS_PER_WORD);
14277 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14279 dstoffset += block_size_bytes;
14282 remaining -= block_size_bytes;
14285 /* Copy any whole words left (note these aren't interleaved with any
14286 subsequent halfword/byte load/stores in the interests of simplicity). */
14288 words = remaining / UNITS_PER_WORD;
14290 gcc_assert (words < interleave_factor);
14292 if (src_aligned && words > 1)
14294 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14295 &srcoffset));
14296 src_autoinc += UNITS_PER_WORD * words;
14298 else
14300 for (j = 0; j < words; j++)
14302 addr = plus_constant (Pmode, src,
14303 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14304 mem = adjust_automodify_address (srcbase, SImode, addr,
14305 srcoffset + j * UNITS_PER_WORD);
14306 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14308 srcoffset += words * UNITS_PER_WORD;
14311 if (dst_aligned && words > 1)
14313 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14314 &dstoffset));
14315 dst_autoinc += words * UNITS_PER_WORD;
14317 else
14319 for (j = 0; j < words; j++)
14321 addr = plus_constant (Pmode, dst,
14322 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14323 mem = adjust_automodify_address (dstbase, SImode, addr,
14324 dstoffset + j * UNITS_PER_WORD);
14325 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14327 dstoffset += words * UNITS_PER_WORD;
14330 remaining -= words * UNITS_PER_WORD;
14332 gcc_assert (remaining < 4);
14334 /* Copy a halfword if necessary. */
14336 if (remaining >= 2)
14338 halfword_tmp = gen_reg_rtx (SImode);
14340 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14341 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14342 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14344 /* Either write out immediately, or delay until we've loaded the last
14345 byte, depending on interleave factor. */
14346 if (interleave_factor == 1)
14348 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14349 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14350 emit_insn (gen_unaligned_storehi (mem,
14351 gen_lowpart (HImode, halfword_tmp)));
14352 halfword_tmp = NULL;
14353 dstoffset += 2;
14356 remaining -= 2;
14357 srcoffset += 2;
14360 gcc_assert (remaining < 2);
14362 /* Copy last byte. */
14364 if ((remaining & 1) != 0)
14366 byte_tmp = gen_reg_rtx (SImode);
14368 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14369 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14370 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14372 if (interleave_factor == 1)
14374 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14375 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14376 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14377 byte_tmp = NULL;
14378 dstoffset++;
14381 remaining--;
14382 srcoffset++;
14385 /* Store last halfword if we haven't done so already. */
14387 if (halfword_tmp)
14389 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14390 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14391 emit_insn (gen_unaligned_storehi (mem,
14392 gen_lowpart (HImode, halfword_tmp)));
14393 dstoffset += 2;
14396 /* Likewise for last byte. */
14398 if (byte_tmp)
14400 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14401 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14402 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14403 dstoffset++;
14406 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14409 /* From mips_adjust_block_mem:
14411 Helper function for doing a loop-based block operation on memory
14412 reference MEM. Each iteration of the loop will operate on LENGTH
14413 bytes of MEM.
14415 Create a new base register for use within the loop and point it to
14416 the start of MEM. Create a new memory reference that uses this
14417 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14419 static void
14420 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14421 rtx *loop_mem)
14423 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14425 /* Although the new mem does not refer to a known location,
14426 it does keep up to LENGTH bytes of alignment. */
14427 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14428 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14431 /* From mips_block_move_loop:
14433 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14434 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14435 the memory regions do not overlap. */
14437 static void
14438 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14439 unsigned int interleave_factor,
14440 HOST_WIDE_INT bytes_per_iter)
14442 rtx src_reg, dest_reg, final_src, test;
14443 HOST_WIDE_INT leftover;
14445 leftover = length % bytes_per_iter;
14446 length -= leftover;
14448 /* Create registers and memory references for use within the loop. */
14449 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14450 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14452 /* Calculate the value that SRC_REG should have after the last iteration of
14453 the loop. */
14454 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14455 0, 0, OPTAB_WIDEN);
14457 /* Emit the start of the loop. */
14458 rtx_code_label *label = gen_label_rtx ();
14459 emit_label (label);
14461 /* Emit the loop body. */
14462 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14463 interleave_factor);
14465 /* Move on to the next block. */
14466 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14467 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14469 /* Emit the loop condition. */
14470 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14471 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14473 /* Mop up any left-over bytes. */
14474 if (leftover)
14475 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14478 /* Emit a block move when either the source or destination is unaligned (not
14479 aligned to a four-byte boundary). This may need further tuning depending on
14480 core type, optimize_size setting, etc. */
14482 static int
14483 arm_movmemqi_unaligned (rtx *operands)
14485 HOST_WIDE_INT length = INTVAL (operands[2]);
14487 if (optimize_size)
14489 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14490 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14491 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14492 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14493 or dst_aligned though: allow more interleaving in those cases since the
14494 resulting code can be smaller. */
14495 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14496 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14498 if (length > 12)
14499 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14500 interleave_factor, bytes_per_iter);
14501 else
14502 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14503 interleave_factor);
14505 else
14507 /* Note that the loop created by arm_block_move_unaligned_loop may be
14508 subject to loop unrolling, which makes tuning this condition a little
14509 redundant. */
14510 if (length > 32)
14511 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14512 else
14513 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14516 return 1;
14520 arm_gen_movmemqi (rtx *operands)
14522 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14523 HOST_WIDE_INT srcoffset, dstoffset;
14524 int i;
14525 rtx src, dst, srcbase, dstbase;
14526 rtx part_bytes_reg = NULL;
14527 rtx mem;
14529 if (!CONST_INT_P (operands[2])
14530 || !CONST_INT_P (operands[3])
14531 || INTVAL (operands[2]) > 64)
14532 return 0;
14534 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14535 return arm_movmemqi_unaligned (operands);
14537 if (INTVAL (operands[3]) & 3)
14538 return 0;
14540 dstbase = operands[0];
14541 srcbase = operands[1];
14543 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14544 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14546 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14547 out_words_to_go = INTVAL (operands[2]) / 4;
14548 last_bytes = INTVAL (operands[2]) & 3;
14549 dstoffset = srcoffset = 0;
14551 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14552 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14554 for (i = 0; in_words_to_go >= 2; i+=4)
14556 if (in_words_to_go > 4)
14557 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14558 TRUE, srcbase, &srcoffset));
14559 else
14560 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14561 src, FALSE, srcbase,
14562 &srcoffset));
14564 if (out_words_to_go)
14566 if (out_words_to_go > 4)
14567 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14568 TRUE, dstbase, &dstoffset));
14569 else if (out_words_to_go != 1)
14570 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14571 out_words_to_go, dst,
14572 (last_bytes == 0
14573 ? FALSE : TRUE),
14574 dstbase, &dstoffset));
14575 else
14577 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14578 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14579 if (last_bytes != 0)
14581 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14582 dstoffset += 4;
14587 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14588 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14591 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14592 if (out_words_to_go)
14594 rtx sreg;
14596 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14597 sreg = copy_to_reg (mem);
14599 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14600 emit_move_insn (mem, sreg);
14601 in_words_to_go--;
14603 gcc_assert (!in_words_to_go); /* Sanity check */
14606 if (in_words_to_go)
14608 gcc_assert (in_words_to_go > 0);
14610 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14611 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14614 gcc_assert (!last_bytes || part_bytes_reg);
14616 if (BYTES_BIG_ENDIAN && last_bytes)
14618 rtx tmp = gen_reg_rtx (SImode);
14620 /* The bytes we want are in the top end of the word. */
14621 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14622 GEN_INT (8 * (4 - last_bytes))));
14623 part_bytes_reg = tmp;
14625 while (last_bytes)
14627 mem = adjust_automodify_address (dstbase, QImode,
14628 plus_constant (Pmode, dst,
14629 last_bytes - 1),
14630 dstoffset + last_bytes - 1);
14631 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14633 if (--last_bytes)
14635 tmp = gen_reg_rtx (SImode);
14636 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14637 part_bytes_reg = tmp;
14642 else
14644 if (last_bytes > 1)
14646 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14647 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14648 last_bytes -= 2;
14649 if (last_bytes)
14651 rtx tmp = gen_reg_rtx (SImode);
14652 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14653 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14654 part_bytes_reg = tmp;
14655 dstoffset += 2;
14659 if (last_bytes)
14661 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14662 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14666 return 1;
14669 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14670 by mode size. */
14671 inline static rtx
14672 next_consecutive_mem (rtx mem)
14674 machine_mode mode = GET_MODE (mem);
14675 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14676 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14678 return adjust_automodify_address (mem, mode, addr, offset);
14681 /* Copy using LDRD/STRD instructions whenever possible.
14682 Returns true upon success. */
14683 bool
14684 gen_movmem_ldrd_strd (rtx *operands)
14686 unsigned HOST_WIDE_INT len;
14687 HOST_WIDE_INT align;
14688 rtx src, dst, base;
14689 rtx reg0;
14690 bool src_aligned, dst_aligned;
14691 bool src_volatile, dst_volatile;
14693 gcc_assert (CONST_INT_P (operands[2]));
14694 gcc_assert (CONST_INT_P (operands[3]));
14696 len = UINTVAL (operands[2]);
14697 if (len > 64)
14698 return false;
14700 /* Maximum alignment we can assume for both src and dst buffers. */
14701 align = INTVAL (operands[3]);
14703 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14704 return false;
14706 /* Place src and dst addresses in registers
14707 and update the corresponding mem rtx. */
14708 dst = operands[0];
14709 dst_volatile = MEM_VOLATILE_P (dst);
14710 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14711 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14712 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14714 src = operands[1];
14715 src_volatile = MEM_VOLATILE_P (src);
14716 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14717 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14718 src = adjust_automodify_address (src, VOIDmode, base, 0);
14720 if (!unaligned_access && !(src_aligned && dst_aligned))
14721 return false;
14723 if (src_volatile || dst_volatile)
14724 return false;
14726 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14727 if (!(dst_aligned || src_aligned))
14728 return arm_gen_movmemqi (operands);
14730 src = adjust_address (src, DImode, 0);
14731 dst = adjust_address (dst, DImode, 0);
14732 while (len >= 8)
14734 len -= 8;
14735 reg0 = gen_reg_rtx (DImode);
14736 if (src_aligned)
14737 emit_move_insn (reg0, src);
14738 else
14739 emit_insn (gen_unaligned_loaddi (reg0, src));
14741 if (dst_aligned)
14742 emit_move_insn (dst, reg0);
14743 else
14744 emit_insn (gen_unaligned_storedi (dst, reg0));
14746 src = next_consecutive_mem (src);
14747 dst = next_consecutive_mem (dst);
14750 gcc_assert (len < 8);
14751 if (len >= 4)
14753 /* More than a word but less than a double-word to copy. Copy a word. */
14754 reg0 = gen_reg_rtx (SImode);
14755 src = adjust_address (src, SImode, 0);
14756 dst = adjust_address (dst, SImode, 0);
14757 if (src_aligned)
14758 emit_move_insn (reg0, src);
14759 else
14760 emit_insn (gen_unaligned_loadsi (reg0, src));
14762 if (dst_aligned)
14763 emit_move_insn (dst, reg0);
14764 else
14765 emit_insn (gen_unaligned_storesi (dst, reg0));
14767 src = next_consecutive_mem (src);
14768 dst = next_consecutive_mem (dst);
14769 len -= 4;
14772 if (len == 0)
14773 return true;
14775 /* Copy the remaining bytes. */
14776 if (len >= 2)
14778 dst = adjust_address (dst, HImode, 0);
14779 src = adjust_address (src, HImode, 0);
14780 reg0 = gen_reg_rtx (SImode);
14781 if (src_aligned)
14782 emit_insn (gen_zero_extendhisi2 (reg0, src));
14783 else
14784 emit_insn (gen_unaligned_loadhiu (reg0, src));
14786 if (dst_aligned)
14787 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14788 else
14789 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14791 src = next_consecutive_mem (src);
14792 dst = next_consecutive_mem (dst);
14793 if (len == 2)
14794 return true;
14797 dst = adjust_address (dst, QImode, 0);
14798 src = adjust_address (src, QImode, 0);
14799 reg0 = gen_reg_rtx (QImode);
14800 emit_move_insn (reg0, src);
14801 emit_move_insn (dst, reg0);
14802 return true;
14805 /* Select a dominance comparison mode if possible for a test of the general
14806 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14807 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14808 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14809 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14810 In all cases OP will be either EQ or NE, but we don't need to know which
14811 here. If we are unable to support a dominance comparison we return
14812 CC mode. This will then fail to match for the RTL expressions that
14813 generate this call. */
14814 machine_mode
14815 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14817 enum rtx_code cond1, cond2;
14818 int swapped = 0;
14820 /* Currently we will probably get the wrong result if the individual
14821 comparisons are not simple. This also ensures that it is safe to
14822 reverse a comparison if necessary. */
14823 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14824 != CCmode)
14825 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14826 != CCmode))
14827 return CCmode;
14829 /* The if_then_else variant of this tests the second condition if the
14830 first passes, but is true if the first fails. Reverse the first
14831 condition to get a true "inclusive-or" expression. */
14832 if (cond_or == DOM_CC_NX_OR_Y)
14833 cond1 = reverse_condition (cond1);
14835 /* If the comparisons are not equal, and one doesn't dominate the other,
14836 then we can't do this. */
14837 if (cond1 != cond2
14838 && !comparison_dominates_p (cond1, cond2)
14839 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14840 return CCmode;
14842 if (swapped)
14843 std::swap (cond1, cond2);
14845 switch (cond1)
14847 case EQ:
14848 if (cond_or == DOM_CC_X_AND_Y)
14849 return CC_DEQmode;
14851 switch (cond2)
14853 case EQ: return CC_DEQmode;
14854 case LE: return CC_DLEmode;
14855 case LEU: return CC_DLEUmode;
14856 case GE: return CC_DGEmode;
14857 case GEU: return CC_DGEUmode;
14858 default: gcc_unreachable ();
14861 case LT:
14862 if (cond_or == DOM_CC_X_AND_Y)
14863 return CC_DLTmode;
14865 switch (cond2)
14867 case LT:
14868 return CC_DLTmode;
14869 case LE:
14870 return CC_DLEmode;
14871 case NE:
14872 return CC_DNEmode;
14873 default:
14874 gcc_unreachable ();
14877 case GT:
14878 if (cond_or == DOM_CC_X_AND_Y)
14879 return CC_DGTmode;
14881 switch (cond2)
14883 case GT:
14884 return CC_DGTmode;
14885 case GE:
14886 return CC_DGEmode;
14887 case NE:
14888 return CC_DNEmode;
14889 default:
14890 gcc_unreachable ();
14893 case LTU:
14894 if (cond_or == DOM_CC_X_AND_Y)
14895 return CC_DLTUmode;
14897 switch (cond2)
14899 case LTU:
14900 return CC_DLTUmode;
14901 case LEU:
14902 return CC_DLEUmode;
14903 case NE:
14904 return CC_DNEmode;
14905 default:
14906 gcc_unreachable ();
14909 case GTU:
14910 if (cond_or == DOM_CC_X_AND_Y)
14911 return CC_DGTUmode;
14913 switch (cond2)
14915 case GTU:
14916 return CC_DGTUmode;
14917 case GEU:
14918 return CC_DGEUmode;
14919 case NE:
14920 return CC_DNEmode;
14921 default:
14922 gcc_unreachable ();
14925 /* The remaining cases only occur when both comparisons are the
14926 same. */
14927 case NE:
14928 gcc_assert (cond1 == cond2);
14929 return CC_DNEmode;
14931 case LE:
14932 gcc_assert (cond1 == cond2);
14933 return CC_DLEmode;
14935 case GE:
14936 gcc_assert (cond1 == cond2);
14937 return CC_DGEmode;
14939 case LEU:
14940 gcc_assert (cond1 == cond2);
14941 return CC_DLEUmode;
14943 case GEU:
14944 gcc_assert (cond1 == cond2);
14945 return CC_DGEUmode;
14947 default:
14948 gcc_unreachable ();
14952 machine_mode
14953 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14955 /* All floating point compares return CCFP if it is an equality
14956 comparison, and CCFPE otherwise. */
14957 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14959 switch (op)
14961 case EQ:
14962 case NE:
14963 case UNORDERED:
14964 case ORDERED:
14965 case UNLT:
14966 case UNLE:
14967 case UNGT:
14968 case UNGE:
14969 case UNEQ:
14970 case LTGT:
14971 return CCFPmode;
14973 case LT:
14974 case LE:
14975 case GT:
14976 case GE:
14977 return CCFPEmode;
14979 default:
14980 gcc_unreachable ();
14984 /* A compare with a shifted operand. Because of canonicalization, the
14985 comparison will have to be swapped when we emit the assembler. */
14986 if (GET_MODE (y) == SImode
14987 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14988 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14989 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14990 || GET_CODE (x) == ROTATERT))
14991 return CC_SWPmode;
14993 /* This operation is performed swapped, but since we only rely on the Z
14994 flag we don't need an additional mode. */
14995 if (GET_MODE (y) == SImode
14996 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14997 && GET_CODE (x) == NEG
14998 && (op == EQ || op == NE))
14999 return CC_Zmode;
15001 /* This is a special case that is used by combine to allow a
15002 comparison of a shifted byte load to be split into a zero-extend
15003 followed by a comparison of the shifted integer (only valid for
15004 equalities and unsigned inequalities). */
15005 if (GET_MODE (x) == SImode
15006 && GET_CODE (x) == ASHIFT
15007 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15008 && GET_CODE (XEXP (x, 0)) == SUBREG
15009 && MEM_P (SUBREG_REG (XEXP (x, 0)))
15010 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15011 && (op == EQ || op == NE
15012 || op == GEU || op == GTU || op == LTU || op == LEU)
15013 && CONST_INT_P (y))
15014 return CC_Zmode;
15016 /* A construct for a conditional compare, if the false arm contains
15017 0, then both conditions must be true, otherwise either condition
15018 must be true. Not all conditions are possible, so CCmode is
15019 returned if it can't be done. */
15020 if (GET_CODE (x) == IF_THEN_ELSE
15021 && (XEXP (x, 2) == const0_rtx
15022 || XEXP (x, 2) == const1_rtx)
15023 && COMPARISON_P (XEXP (x, 0))
15024 && COMPARISON_P (XEXP (x, 1)))
15025 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15026 INTVAL (XEXP (x, 2)));
15028 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15029 if (GET_CODE (x) == AND
15030 && (op == EQ || op == NE)
15031 && COMPARISON_P (XEXP (x, 0))
15032 && COMPARISON_P (XEXP (x, 1)))
15033 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15034 DOM_CC_X_AND_Y);
15036 if (GET_CODE (x) == IOR
15037 && (op == EQ || op == NE)
15038 && COMPARISON_P (XEXP (x, 0))
15039 && COMPARISON_P (XEXP (x, 1)))
15040 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15041 DOM_CC_X_OR_Y);
15043 /* An operation (on Thumb) where we want to test for a single bit.
15044 This is done by shifting that bit up into the top bit of a
15045 scratch register; we can then branch on the sign bit. */
15046 if (TARGET_THUMB1
15047 && GET_MODE (x) == SImode
15048 && (op == EQ || op == NE)
15049 && GET_CODE (x) == ZERO_EXTRACT
15050 && XEXP (x, 1) == const1_rtx)
15051 return CC_Nmode;
15053 /* An operation that sets the condition codes as a side-effect, the
15054 V flag is not set correctly, so we can only use comparisons where
15055 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15056 instead.) */
15057 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15058 if (GET_MODE (x) == SImode
15059 && y == const0_rtx
15060 && (op == EQ || op == NE || op == LT || op == GE)
15061 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15062 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15063 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15064 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15065 || GET_CODE (x) == LSHIFTRT
15066 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15067 || GET_CODE (x) == ROTATERT
15068 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15069 return CC_NOOVmode;
15071 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15072 return CC_Zmode;
15074 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15075 && GET_CODE (x) == PLUS
15076 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15077 return CC_Cmode;
15079 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15081 switch (op)
15083 case EQ:
15084 case NE:
15085 /* A DImode comparison against zero can be implemented by
15086 or'ing the two halves together. */
15087 if (y == const0_rtx)
15088 return CC_Zmode;
15090 /* We can do an equality test in three Thumb instructions. */
15091 if (!TARGET_32BIT)
15092 return CC_Zmode;
15094 /* FALLTHROUGH */
15096 case LTU:
15097 case LEU:
15098 case GTU:
15099 case GEU:
15100 /* DImode unsigned comparisons can be implemented by cmp +
15101 cmpeq without a scratch register. Not worth doing in
15102 Thumb-2. */
15103 if (TARGET_32BIT)
15104 return CC_CZmode;
15106 /* FALLTHROUGH */
15108 case LT:
15109 case LE:
15110 case GT:
15111 case GE:
15112 /* DImode signed and unsigned comparisons can be implemented
15113 by cmp + sbcs with a scratch register, but that does not
15114 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15115 gcc_assert (op != EQ && op != NE);
15116 return CC_NCVmode;
15118 default:
15119 gcc_unreachable ();
15123 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15124 return GET_MODE (x);
15126 return CCmode;
15129 /* X and Y are two things to compare using CODE. Emit the compare insn and
15130 return the rtx for register 0 in the proper mode. FP means this is a
15131 floating point compare: I don't think that it is needed on the arm. */
15133 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15135 machine_mode mode;
15136 rtx cc_reg;
15137 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15139 /* We might have X as a constant, Y as a register because of the predicates
15140 used for cmpdi. If so, force X to a register here. */
15141 if (dimode_comparison && !REG_P (x))
15142 x = force_reg (DImode, x);
15144 mode = SELECT_CC_MODE (code, x, y);
15145 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15147 if (dimode_comparison
15148 && mode != CC_CZmode)
15150 rtx clobber, set;
15152 /* To compare two non-zero values for equality, XOR them and
15153 then compare against zero. Not used for ARM mode; there
15154 CC_CZmode is cheaper. */
15155 if (mode == CC_Zmode && y != const0_rtx)
15157 gcc_assert (!reload_completed);
15158 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15159 y = const0_rtx;
15162 /* A scratch register is required. */
15163 if (reload_completed)
15164 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15165 else
15166 scratch = gen_rtx_SCRATCH (SImode);
15168 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15169 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
15170 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15172 else
15173 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15175 return cc_reg;
15178 /* Generate a sequence of insns that will generate the correct return
15179 address mask depending on the physical architecture that the program
15180 is running on. */
15182 arm_gen_return_addr_mask (void)
15184 rtx reg = gen_reg_rtx (Pmode);
15186 emit_insn (gen_return_addr_mask (reg));
15187 return reg;
15190 void
15191 arm_reload_in_hi (rtx *operands)
15193 rtx ref = operands[1];
15194 rtx base, scratch;
15195 HOST_WIDE_INT offset = 0;
15197 if (GET_CODE (ref) == SUBREG)
15199 offset = SUBREG_BYTE (ref);
15200 ref = SUBREG_REG (ref);
15203 if (REG_P (ref))
15205 /* We have a pseudo which has been spilt onto the stack; there
15206 are two cases here: the first where there is a simple
15207 stack-slot replacement and a second where the stack-slot is
15208 out of range, or is used as a subreg. */
15209 if (reg_equiv_mem (REGNO (ref)))
15211 ref = reg_equiv_mem (REGNO (ref));
15212 base = find_replacement (&XEXP (ref, 0));
15214 else
15215 /* The slot is out of range, or was dressed up in a SUBREG. */
15216 base = reg_equiv_address (REGNO (ref));
15218 else
15219 base = find_replacement (&XEXP (ref, 0));
15221 /* Handle the case where the address is too complex to be offset by 1. */
15222 if (GET_CODE (base) == MINUS
15223 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15225 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15227 emit_set_insn (base_plus, base);
15228 base = base_plus;
15230 else if (GET_CODE (base) == PLUS)
15232 /* The addend must be CONST_INT, or we would have dealt with it above. */
15233 HOST_WIDE_INT hi, lo;
15235 offset += INTVAL (XEXP (base, 1));
15236 base = XEXP (base, 0);
15238 /* Rework the address into a legal sequence of insns. */
15239 /* Valid range for lo is -4095 -> 4095 */
15240 lo = (offset >= 0
15241 ? (offset & 0xfff)
15242 : -((-offset) & 0xfff));
15244 /* Corner case, if lo is the max offset then we would be out of range
15245 once we have added the additional 1 below, so bump the msb into the
15246 pre-loading insn(s). */
15247 if (lo == 4095)
15248 lo &= 0x7ff;
15250 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15251 ^ (HOST_WIDE_INT) 0x80000000)
15252 - (HOST_WIDE_INT) 0x80000000);
15254 gcc_assert (hi + lo == offset);
15256 if (hi != 0)
15258 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15260 /* Get the base address; addsi3 knows how to handle constants
15261 that require more than one insn. */
15262 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15263 base = base_plus;
15264 offset = lo;
15268 /* Operands[2] may overlap operands[0] (though it won't overlap
15269 operands[1]), that's why we asked for a DImode reg -- so we can
15270 use the bit that does not overlap. */
15271 if (REGNO (operands[2]) == REGNO (operands[0]))
15272 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15273 else
15274 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15276 emit_insn (gen_zero_extendqisi2 (scratch,
15277 gen_rtx_MEM (QImode,
15278 plus_constant (Pmode, base,
15279 offset))));
15280 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15281 gen_rtx_MEM (QImode,
15282 plus_constant (Pmode, base,
15283 offset + 1))));
15284 if (!BYTES_BIG_ENDIAN)
15285 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15286 gen_rtx_IOR (SImode,
15287 gen_rtx_ASHIFT
15288 (SImode,
15289 gen_rtx_SUBREG (SImode, operands[0], 0),
15290 GEN_INT (8)),
15291 scratch));
15292 else
15293 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15294 gen_rtx_IOR (SImode,
15295 gen_rtx_ASHIFT (SImode, scratch,
15296 GEN_INT (8)),
15297 gen_rtx_SUBREG (SImode, operands[0], 0)));
15300 /* Handle storing a half-word to memory during reload by synthesizing as two
15301 byte stores. Take care not to clobber the input values until after we
15302 have moved them somewhere safe. This code assumes that if the DImode
15303 scratch in operands[2] overlaps either the input value or output address
15304 in some way, then that value must die in this insn (we absolutely need
15305 two scratch registers for some corner cases). */
15306 void
15307 arm_reload_out_hi (rtx *operands)
15309 rtx ref = operands[0];
15310 rtx outval = operands[1];
15311 rtx base, scratch;
15312 HOST_WIDE_INT offset = 0;
15314 if (GET_CODE (ref) == SUBREG)
15316 offset = SUBREG_BYTE (ref);
15317 ref = SUBREG_REG (ref);
15320 if (REG_P (ref))
15322 /* We have a pseudo which has been spilt onto the stack; there
15323 are two cases here: the first where there is a simple
15324 stack-slot replacement and a second where the stack-slot is
15325 out of range, or is used as a subreg. */
15326 if (reg_equiv_mem (REGNO (ref)))
15328 ref = reg_equiv_mem (REGNO (ref));
15329 base = find_replacement (&XEXP (ref, 0));
15331 else
15332 /* The slot is out of range, or was dressed up in a SUBREG. */
15333 base = reg_equiv_address (REGNO (ref));
15335 else
15336 base = find_replacement (&XEXP (ref, 0));
15338 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15340 /* Handle the case where the address is too complex to be offset by 1. */
15341 if (GET_CODE (base) == MINUS
15342 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15344 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15346 /* Be careful not to destroy OUTVAL. */
15347 if (reg_overlap_mentioned_p (base_plus, outval))
15349 /* Updating base_plus might destroy outval, see if we can
15350 swap the scratch and base_plus. */
15351 if (!reg_overlap_mentioned_p (scratch, outval))
15352 std::swap (scratch, base_plus);
15353 else
15355 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15357 /* Be conservative and copy OUTVAL into the scratch now,
15358 this should only be necessary if outval is a subreg
15359 of something larger than a word. */
15360 /* XXX Might this clobber base? I can't see how it can,
15361 since scratch is known to overlap with OUTVAL, and
15362 must be wider than a word. */
15363 emit_insn (gen_movhi (scratch_hi, outval));
15364 outval = scratch_hi;
15368 emit_set_insn (base_plus, base);
15369 base = base_plus;
15371 else if (GET_CODE (base) == PLUS)
15373 /* The addend must be CONST_INT, or we would have dealt with it above. */
15374 HOST_WIDE_INT hi, lo;
15376 offset += INTVAL (XEXP (base, 1));
15377 base = XEXP (base, 0);
15379 /* Rework the address into a legal sequence of insns. */
15380 /* Valid range for lo is -4095 -> 4095 */
15381 lo = (offset >= 0
15382 ? (offset & 0xfff)
15383 : -((-offset) & 0xfff));
15385 /* Corner case, if lo is the max offset then we would be out of range
15386 once we have added the additional 1 below, so bump the msb into the
15387 pre-loading insn(s). */
15388 if (lo == 4095)
15389 lo &= 0x7ff;
15391 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15392 ^ (HOST_WIDE_INT) 0x80000000)
15393 - (HOST_WIDE_INT) 0x80000000);
15395 gcc_assert (hi + lo == offset);
15397 if (hi != 0)
15399 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15401 /* Be careful not to destroy OUTVAL. */
15402 if (reg_overlap_mentioned_p (base_plus, outval))
15404 /* Updating base_plus might destroy outval, see if we
15405 can swap the scratch and base_plus. */
15406 if (!reg_overlap_mentioned_p (scratch, outval))
15407 std::swap (scratch, base_plus);
15408 else
15410 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15412 /* Be conservative and copy outval into scratch now,
15413 this should only be necessary if outval is a
15414 subreg of something larger than a word. */
15415 /* XXX Might this clobber base? I can't see how it
15416 can, since scratch is known to overlap with
15417 outval. */
15418 emit_insn (gen_movhi (scratch_hi, outval));
15419 outval = scratch_hi;
15423 /* Get the base address; addsi3 knows how to handle constants
15424 that require more than one insn. */
15425 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15426 base = base_plus;
15427 offset = lo;
15431 if (BYTES_BIG_ENDIAN)
15433 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15434 plus_constant (Pmode, base,
15435 offset + 1)),
15436 gen_lowpart (QImode, outval)));
15437 emit_insn (gen_lshrsi3 (scratch,
15438 gen_rtx_SUBREG (SImode, outval, 0),
15439 GEN_INT (8)));
15440 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15441 offset)),
15442 gen_lowpart (QImode, scratch)));
15444 else
15446 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15447 offset)),
15448 gen_lowpart (QImode, outval)));
15449 emit_insn (gen_lshrsi3 (scratch,
15450 gen_rtx_SUBREG (SImode, outval, 0),
15451 GEN_INT (8)));
15452 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15453 plus_constant (Pmode, base,
15454 offset + 1)),
15455 gen_lowpart (QImode, scratch)));
15459 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15460 (padded to the size of a word) should be passed in a register. */
15462 static bool
15463 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15465 if (TARGET_AAPCS_BASED)
15466 return must_pass_in_stack_var_size (mode, type);
15467 else
15468 return must_pass_in_stack_var_size_or_pad (mode, type);
15472 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15473 Return true if an argument passed on the stack should be padded upwards,
15474 i.e. if the least-significant byte has useful data.
15475 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15476 aggregate types are placed in the lowest memory address. */
15478 bool
15479 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15481 if (!TARGET_AAPCS_BASED)
15482 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15484 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15485 return false;
15487 return true;
15491 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15492 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15493 register has useful data, and return the opposite if the most
15494 significant byte does. */
15496 bool
15497 arm_pad_reg_upward (machine_mode mode,
15498 tree type, int first ATTRIBUTE_UNUSED)
15500 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15502 /* For AAPCS, small aggregates, small fixed-point types,
15503 and small complex types are always padded upwards. */
15504 if (type)
15506 if ((AGGREGATE_TYPE_P (type)
15507 || TREE_CODE (type) == COMPLEX_TYPE
15508 || FIXED_POINT_TYPE_P (type))
15509 && int_size_in_bytes (type) <= 4)
15510 return true;
15512 else
15514 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15515 && GET_MODE_SIZE (mode) <= 4)
15516 return true;
15520 /* Otherwise, use default padding. */
15521 return !BYTES_BIG_ENDIAN;
15524 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15525 assuming that the address in the base register is word aligned. */
15526 bool
15527 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15529 HOST_WIDE_INT max_offset;
15531 /* Offset must be a multiple of 4 in Thumb mode. */
15532 if (TARGET_THUMB2 && ((offset & 3) != 0))
15533 return false;
15535 if (TARGET_THUMB2)
15536 max_offset = 1020;
15537 else if (TARGET_ARM)
15538 max_offset = 255;
15539 else
15540 return false;
15542 return ((offset <= max_offset) && (offset >= -max_offset));
15545 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15546 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15547 Assumes that the address in the base register RN is word aligned. Pattern
15548 guarantees that both memory accesses use the same base register,
15549 the offsets are constants within the range, and the gap between the offsets is 4.
15550 If preload complete then check that registers are legal. WBACK indicates whether
15551 address is updated. LOAD indicates whether memory access is load or store. */
15552 bool
15553 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15554 bool wback, bool load)
15556 unsigned int t, t2, n;
15558 if (!reload_completed)
15559 return true;
15561 if (!offset_ok_for_ldrd_strd (offset))
15562 return false;
15564 t = REGNO (rt);
15565 t2 = REGNO (rt2);
15566 n = REGNO (rn);
15568 if ((TARGET_THUMB2)
15569 && ((wback && (n == t || n == t2))
15570 || (t == SP_REGNUM)
15571 || (t == PC_REGNUM)
15572 || (t2 == SP_REGNUM)
15573 || (t2 == PC_REGNUM)
15574 || (!load && (n == PC_REGNUM))
15575 || (load && (t == t2))
15576 /* Triggers Cortex-M3 LDRD errata. */
15577 || (!wback && load && fix_cm3_ldrd && (n == t))))
15578 return false;
15580 if ((TARGET_ARM)
15581 && ((wback && (n == t || n == t2))
15582 || (t2 == PC_REGNUM)
15583 || (t % 2 != 0) /* First destination register is not even. */
15584 || (t2 != t + 1)
15585 /* PC can be used as base register (for offset addressing only),
15586 but it is depricated. */
15587 || (n == PC_REGNUM)))
15588 return false;
15590 return true;
15593 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15594 operand MEM's address contains an immediate offset from the base
15595 register and has no side effects, in which case it sets BASE and
15596 OFFSET accordingly. */
15597 static bool
15598 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15600 rtx addr;
15602 gcc_assert (base != NULL && offset != NULL);
15604 /* TODO: Handle more general memory operand patterns, such as
15605 PRE_DEC and PRE_INC. */
15607 if (side_effects_p (mem))
15608 return false;
15610 /* Can't deal with subregs. */
15611 if (GET_CODE (mem) == SUBREG)
15612 return false;
15614 gcc_assert (MEM_P (mem));
15616 *offset = const0_rtx;
15618 addr = XEXP (mem, 0);
15620 /* If addr isn't valid for DImode, then we can't handle it. */
15621 if (!arm_legitimate_address_p (DImode, addr,
15622 reload_in_progress || reload_completed))
15623 return false;
15625 if (REG_P (addr))
15627 *base = addr;
15628 return true;
15630 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15632 *base = XEXP (addr, 0);
15633 *offset = XEXP (addr, 1);
15634 return (REG_P (*base) && CONST_INT_P (*offset));
15637 return false;
15640 /* Called from a peephole2 to replace two word-size accesses with a
15641 single LDRD/STRD instruction. Returns true iff we can generate a
15642 new instruction sequence. That is, both accesses use the same base
15643 register and the gap between constant offsets is 4. This function
15644 may reorder its operands to match ldrd/strd RTL templates.
15645 OPERANDS are the operands found by the peephole matcher;
15646 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15647 corresponding memory operands. LOAD indicaates whether the access
15648 is load or store. CONST_STORE indicates a store of constant
15649 integer values held in OPERANDS[4,5] and assumes that the pattern
15650 is of length 4 insn, for the purpose of checking dead registers.
15651 COMMUTE indicates that register operands may be reordered. */
15652 bool
15653 gen_operands_ldrd_strd (rtx *operands, bool load,
15654 bool const_store, bool commute)
15656 int nops = 2;
15657 HOST_WIDE_INT offsets[2], offset;
15658 rtx base = NULL_RTX;
15659 rtx cur_base, cur_offset, tmp;
15660 int i, gap;
15661 HARD_REG_SET regset;
15663 gcc_assert (!const_store || !load);
15664 /* Check that the memory references are immediate offsets from the
15665 same base register. Extract the base register, the destination
15666 registers, and the corresponding memory offsets. */
15667 for (i = 0; i < nops; i++)
15669 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15670 return false;
15672 if (i == 0)
15673 base = cur_base;
15674 else if (REGNO (base) != REGNO (cur_base))
15675 return false;
15677 offsets[i] = INTVAL (cur_offset);
15678 if (GET_CODE (operands[i]) == SUBREG)
15680 tmp = SUBREG_REG (operands[i]);
15681 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15682 operands[i] = tmp;
15686 /* Make sure there is no dependency between the individual loads. */
15687 if (load && REGNO (operands[0]) == REGNO (base))
15688 return false; /* RAW */
15690 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15691 return false; /* WAW */
15693 /* If the same input register is used in both stores
15694 when storing different constants, try to find a free register.
15695 For example, the code
15696 mov r0, 0
15697 str r0, [r2]
15698 mov r0, 1
15699 str r0, [r2, #4]
15700 can be transformed into
15701 mov r1, 0
15702 strd r1, r0, [r2]
15703 in Thumb mode assuming that r1 is free. */
15704 if (const_store
15705 && REGNO (operands[0]) == REGNO (operands[1])
15706 && INTVAL (operands[4]) != INTVAL (operands[5]))
15708 if (TARGET_THUMB2)
15710 CLEAR_HARD_REG_SET (regset);
15711 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15712 if (tmp == NULL_RTX)
15713 return false;
15715 /* Use the new register in the first load to ensure that
15716 if the original input register is not dead after peephole,
15717 then it will have the correct constant value. */
15718 operands[0] = tmp;
15720 else if (TARGET_ARM)
15722 return false;
15723 int regno = REGNO (operands[0]);
15724 if (!peep2_reg_dead_p (4, operands[0]))
15726 /* When the input register is even and is not dead after the
15727 pattern, it has to hold the second constant but we cannot
15728 form a legal STRD in ARM mode with this register as the second
15729 register. */
15730 if (regno % 2 == 0)
15731 return false;
15733 /* Is regno-1 free? */
15734 SET_HARD_REG_SET (regset);
15735 CLEAR_HARD_REG_BIT(regset, regno - 1);
15736 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15737 if (tmp == NULL_RTX)
15738 return false;
15740 operands[0] = tmp;
15742 else
15744 /* Find a DImode register. */
15745 CLEAR_HARD_REG_SET (regset);
15746 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15747 if (tmp != NULL_RTX)
15749 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15750 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15752 else
15754 /* Can we use the input register to form a DI register? */
15755 SET_HARD_REG_SET (regset);
15756 CLEAR_HARD_REG_BIT(regset,
15757 regno % 2 == 0 ? regno + 1 : regno - 1);
15758 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15759 if (tmp == NULL_RTX)
15760 return false;
15761 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15765 gcc_assert (operands[0] != NULL_RTX);
15766 gcc_assert (operands[1] != NULL_RTX);
15767 gcc_assert (REGNO (operands[0]) % 2 == 0);
15768 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15772 /* Make sure the instructions are ordered with lower memory access first. */
15773 if (offsets[0] > offsets[1])
15775 gap = offsets[0] - offsets[1];
15776 offset = offsets[1];
15778 /* Swap the instructions such that lower memory is accessed first. */
15779 std::swap (operands[0], operands[1]);
15780 std::swap (operands[2], operands[3]);
15781 if (const_store)
15782 std::swap (operands[4], operands[5]);
15784 else
15786 gap = offsets[1] - offsets[0];
15787 offset = offsets[0];
15790 /* Make sure accesses are to consecutive memory locations. */
15791 if (gap != 4)
15792 return false;
15794 /* Make sure we generate legal instructions. */
15795 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15796 false, load))
15797 return true;
15799 /* In Thumb state, where registers are almost unconstrained, there
15800 is little hope to fix it. */
15801 if (TARGET_THUMB2)
15802 return false;
15804 if (load && commute)
15806 /* Try reordering registers. */
15807 std::swap (operands[0], operands[1]);
15808 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15809 false, load))
15810 return true;
15813 if (const_store)
15815 /* If input registers are dead after this pattern, they can be
15816 reordered or replaced by other registers that are free in the
15817 current pattern. */
15818 if (!peep2_reg_dead_p (4, operands[0])
15819 || !peep2_reg_dead_p (4, operands[1]))
15820 return false;
15822 /* Try to reorder the input registers. */
15823 /* For example, the code
15824 mov r0, 0
15825 mov r1, 1
15826 str r1, [r2]
15827 str r0, [r2, #4]
15828 can be transformed into
15829 mov r1, 0
15830 mov r0, 1
15831 strd r0, [r2]
15833 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15834 false, false))
15836 std::swap (operands[0], operands[1]);
15837 return true;
15840 /* Try to find a free DI register. */
15841 CLEAR_HARD_REG_SET (regset);
15842 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15843 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15844 while (true)
15846 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15847 if (tmp == NULL_RTX)
15848 return false;
15850 /* DREG must be an even-numbered register in DImode.
15851 Split it into SI registers. */
15852 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15853 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15854 gcc_assert (operands[0] != NULL_RTX);
15855 gcc_assert (operands[1] != NULL_RTX);
15856 gcc_assert (REGNO (operands[0]) % 2 == 0);
15857 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15859 return (operands_ok_ldrd_strd (operands[0], operands[1],
15860 base, offset,
15861 false, load));
15865 return false;
15871 /* Print a symbolic form of X to the debug file, F. */
15872 static void
15873 arm_print_value (FILE *f, rtx x)
15875 switch (GET_CODE (x))
15877 case CONST_INT:
15878 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15879 return;
15881 case CONST_DOUBLE:
15882 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15883 return;
15885 case CONST_VECTOR:
15887 int i;
15889 fprintf (f, "<");
15890 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15892 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15893 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15894 fputc (',', f);
15896 fprintf (f, ">");
15898 return;
15900 case CONST_STRING:
15901 fprintf (f, "\"%s\"", XSTR (x, 0));
15902 return;
15904 case SYMBOL_REF:
15905 fprintf (f, "`%s'", XSTR (x, 0));
15906 return;
15908 case LABEL_REF:
15909 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15910 return;
15912 case CONST:
15913 arm_print_value (f, XEXP (x, 0));
15914 return;
15916 case PLUS:
15917 arm_print_value (f, XEXP (x, 0));
15918 fprintf (f, "+");
15919 arm_print_value (f, XEXP (x, 1));
15920 return;
15922 case PC:
15923 fprintf (f, "pc");
15924 return;
15926 default:
15927 fprintf (f, "????");
15928 return;
15932 /* Routines for manipulation of the constant pool. */
15934 /* Arm instructions cannot load a large constant directly into a
15935 register; they have to come from a pc relative load. The constant
15936 must therefore be placed in the addressable range of the pc
15937 relative load. Depending on the precise pc relative load
15938 instruction the range is somewhere between 256 bytes and 4k. This
15939 means that we often have to dump a constant inside a function, and
15940 generate code to branch around it.
15942 It is important to minimize this, since the branches will slow
15943 things down and make the code larger.
15945 Normally we can hide the table after an existing unconditional
15946 branch so that there is no interruption of the flow, but in the
15947 worst case the code looks like this:
15949 ldr rn, L1
15951 b L2
15952 align
15953 L1: .long value
15957 ldr rn, L3
15959 b L4
15960 align
15961 L3: .long value
15965 We fix this by performing a scan after scheduling, which notices
15966 which instructions need to have their operands fetched from the
15967 constant table and builds the table.
15969 The algorithm starts by building a table of all the constants that
15970 need fixing up and all the natural barriers in the function (places
15971 where a constant table can be dropped without breaking the flow).
15972 For each fixup we note how far the pc-relative replacement will be
15973 able to reach and the offset of the instruction into the function.
15975 Having built the table we then group the fixes together to form
15976 tables that are as large as possible (subject to addressing
15977 constraints) and emit each table of constants after the last
15978 barrier that is within range of all the instructions in the group.
15979 If a group does not contain a barrier, then we forcibly create one
15980 by inserting a jump instruction into the flow. Once the table has
15981 been inserted, the insns are then modified to reference the
15982 relevant entry in the pool.
15984 Possible enhancements to the algorithm (not implemented) are:
15986 1) For some processors and object formats, there may be benefit in
15987 aligning the pools to the start of cache lines; this alignment
15988 would need to be taken into account when calculating addressability
15989 of a pool. */
15991 /* These typedefs are located at the start of this file, so that
15992 they can be used in the prototypes there. This comment is to
15993 remind readers of that fact so that the following structures
15994 can be understood more easily.
15996 typedef struct minipool_node Mnode;
15997 typedef struct minipool_fixup Mfix; */
15999 struct minipool_node
16001 /* Doubly linked chain of entries. */
16002 Mnode * next;
16003 Mnode * prev;
16004 /* The maximum offset into the code that this entry can be placed. While
16005 pushing fixes for forward references, all entries are sorted in order
16006 of increasing max_address. */
16007 HOST_WIDE_INT max_address;
16008 /* Similarly for an entry inserted for a backwards ref. */
16009 HOST_WIDE_INT min_address;
16010 /* The number of fixes referencing this entry. This can become zero
16011 if we "unpush" an entry. In this case we ignore the entry when we
16012 come to emit the code. */
16013 int refcount;
16014 /* The offset from the start of the minipool. */
16015 HOST_WIDE_INT offset;
16016 /* The value in table. */
16017 rtx value;
16018 /* The mode of value. */
16019 machine_mode mode;
16020 /* The size of the value. With iWMMXt enabled
16021 sizes > 4 also imply an alignment of 8-bytes. */
16022 int fix_size;
16025 struct minipool_fixup
16027 Mfix * next;
16028 rtx_insn * insn;
16029 HOST_WIDE_INT address;
16030 rtx * loc;
16031 machine_mode mode;
16032 int fix_size;
16033 rtx value;
16034 Mnode * minipool;
16035 HOST_WIDE_INT forwards;
16036 HOST_WIDE_INT backwards;
16039 /* Fixes less than a word need padding out to a word boundary. */
16040 #define MINIPOOL_FIX_SIZE(mode) \
16041 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16043 static Mnode * minipool_vector_head;
16044 static Mnode * minipool_vector_tail;
16045 static rtx_code_label *minipool_vector_label;
16046 static int minipool_pad;
16048 /* The linked list of all minipool fixes required for this function. */
16049 Mfix * minipool_fix_head;
16050 Mfix * minipool_fix_tail;
16051 /* The fix entry for the current minipool, once it has been placed. */
16052 Mfix * minipool_barrier;
16054 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16055 #define JUMP_TABLES_IN_TEXT_SECTION 0
16056 #endif
16058 static HOST_WIDE_INT
16059 get_jump_table_size (rtx_jump_table_data *insn)
16061 /* ADDR_VECs only take room if read-only data does into the text
16062 section. */
16063 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16065 rtx body = PATTERN (insn);
16066 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16067 HOST_WIDE_INT size;
16068 HOST_WIDE_INT modesize;
16070 modesize = GET_MODE_SIZE (GET_MODE (body));
16071 size = modesize * XVECLEN (body, elt);
16072 switch (modesize)
16074 case 1:
16075 /* Round up size of TBB table to a halfword boundary. */
16076 size = (size + 1) & ~(HOST_WIDE_INT)1;
16077 break;
16078 case 2:
16079 /* No padding necessary for TBH. */
16080 break;
16081 case 4:
16082 /* Add two bytes for alignment on Thumb. */
16083 if (TARGET_THUMB)
16084 size += 2;
16085 break;
16086 default:
16087 gcc_unreachable ();
16089 return size;
16092 return 0;
16095 /* Return the maximum amount of padding that will be inserted before
16096 label LABEL. */
16098 static HOST_WIDE_INT
16099 get_label_padding (rtx label)
16101 HOST_WIDE_INT align, min_insn_size;
16103 align = 1 << label_to_alignment (label);
16104 min_insn_size = TARGET_THUMB ? 2 : 4;
16105 return align > min_insn_size ? align - min_insn_size : 0;
16108 /* Move a minipool fix MP from its current location to before MAX_MP.
16109 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16110 constraints may need updating. */
16111 static Mnode *
16112 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16113 HOST_WIDE_INT max_address)
16115 /* The code below assumes these are different. */
16116 gcc_assert (mp != max_mp);
16118 if (max_mp == NULL)
16120 if (max_address < mp->max_address)
16121 mp->max_address = max_address;
16123 else
16125 if (max_address > max_mp->max_address - mp->fix_size)
16126 mp->max_address = max_mp->max_address - mp->fix_size;
16127 else
16128 mp->max_address = max_address;
16130 /* Unlink MP from its current position. Since max_mp is non-null,
16131 mp->prev must be non-null. */
16132 mp->prev->next = mp->next;
16133 if (mp->next != NULL)
16134 mp->next->prev = mp->prev;
16135 else
16136 minipool_vector_tail = mp->prev;
16138 /* Re-insert it before MAX_MP. */
16139 mp->next = max_mp;
16140 mp->prev = max_mp->prev;
16141 max_mp->prev = mp;
16143 if (mp->prev != NULL)
16144 mp->prev->next = mp;
16145 else
16146 minipool_vector_head = mp;
16149 /* Save the new entry. */
16150 max_mp = mp;
16152 /* Scan over the preceding entries and adjust their addresses as
16153 required. */
16154 while (mp->prev != NULL
16155 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16157 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16158 mp = mp->prev;
16161 return max_mp;
16164 /* Add a constant to the minipool for a forward reference. Returns the
16165 node added or NULL if the constant will not fit in this pool. */
16166 static Mnode *
16167 add_minipool_forward_ref (Mfix *fix)
16169 /* If set, max_mp is the first pool_entry that has a lower
16170 constraint than the one we are trying to add. */
16171 Mnode * max_mp = NULL;
16172 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16173 Mnode * mp;
16175 /* If the minipool starts before the end of FIX->INSN then this FIX
16176 can not be placed into the current pool. Furthermore, adding the
16177 new constant pool entry may cause the pool to start FIX_SIZE bytes
16178 earlier. */
16179 if (minipool_vector_head &&
16180 (fix->address + get_attr_length (fix->insn)
16181 >= minipool_vector_head->max_address - fix->fix_size))
16182 return NULL;
16184 /* Scan the pool to see if a constant with the same value has
16185 already been added. While we are doing this, also note the
16186 location where we must insert the constant if it doesn't already
16187 exist. */
16188 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16190 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16191 && fix->mode == mp->mode
16192 && (!LABEL_P (fix->value)
16193 || (CODE_LABEL_NUMBER (fix->value)
16194 == CODE_LABEL_NUMBER (mp->value)))
16195 && rtx_equal_p (fix->value, mp->value))
16197 /* More than one fix references this entry. */
16198 mp->refcount++;
16199 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16202 /* Note the insertion point if necessary. */
16203 if (max_mp == NULL
16204 && mp->max_address > max_address)
16205 max_mp = mp;
16207 /* If we are inserting an 8-bytes aligned quantity and
16208 we have not already found an insertion point, then
16209 make sure that all such 8-byte aligned quantities are
16210 placed at the start of the pool. */
16211 if (ARM_DOUBLEWORD_ALIGN
16212 && max_mp == NULL
16213 && fix->fix_size >= 8
16214 && mp->fix_size < 8)
16216 max_mp = mp;
16217 max_address = mp->max_address;
16221 /* The value is not currently in the minipool, so we need to create
16222 a new entry for it. If MAX_MP is NULL, the entry will be put on
16223 the end of the list since the placement is less constrained than
16224 any existing entry. Otherwise, we insert the new fix before
16225 MAX_MP and, if necessary, adjust the constraints on the other
16226 entries. */
16227 mp = XNEW (Mnode);
16228 mp->fix_size = fix->fix_size;
16229 mp->mode = fix->mode;
16230 mp->value = fix->value;
16231 mp->refcount = 1;
16232 /* Not yet required for a backwards ref. */
16233 mp->min_address = -65536;
16235 if (max_mp == NULL)
16237 mp->max_address = max_address;
16238 mp->next = NULL;
16239 mp->prev = minipool_vector_tail;
16241 if (mp->prev == NULL)
16243 minipool_vector_head = mp;
16244 minipool_vector_label = gen_label_rtx ();
16246 else
16247 mp->prev->next = mp;
16249 minipool_vector_tail = mp;
16251 else
16253 if (max_address > max_mp->max_address - mp->fix_size)
16254 mp->max_address = max_mp->max_address - mp->fix_size;
16255 else
16256 mp->max_address = max_address;
16258 mp->next = max_mp;
16259 mp->prev = max_mp->prev;
16260 max_mp->prev = mp;
16261 if (mp->prev != NULL)
16262 mp->prev->next = mp;
16263 else
16264 minipool_vector_head = mp;
16267 /* Save the new entry. */
16268 max_mp = mp;
16270 /* Scan over the preceding entries and adjust their addresses as
16271 required. */
16272 while (mp->prev != NULL
16273 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16275 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16276 mp = mp->prev;
16279 return max_mp;
16282 static Mnode *
16283 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16284 HOST_WIDE_INT min_address)
16286 HOST_WIDE_INT offset;
16288 /* The code below assumes these are different. */
16289 gcc_assert (mp != min_mp);
16291 if (min_mp == NULL)
16293 if (min_address > mp->min_address)
16294 mp->min_address = min_address;
16296 else
16298 /* We will adjust this below if it is too loose. */
16299 mp->min_address = min_address;
16301 /* Unlink MP from its current position. Since min_mp is non-null,
16302 mp->next must be non-null. */
16303 mp->next->prev = mp->prev;
16304 if (mp->prev != NULL)
16305 mp->prev->next = mp->next;
16306 else
16307 minipool_vector_head = mp->next;
16309 /* Reinsert it after MIN_MP. */
16310 mp->prev = min_mp;
16311 mp->next = min_mp->next;
16312 min_mp->next = mp;
16313 if (mp->next != NULL)
16314 mp->next->prev = mp;
16315 else
16316 minipool_vector_tail = mp;
16319 min_mp = mp;
16321 offset = 0;
16322 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16324 mp->offset = offset;
16325 if (mp->refcount > 0)
16326 offset += mp->fix_size;
16328 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16329 mp->next->min_address = mp->min_address + mp->fix_size;
16332 return min_mp;
16335 /* Add a constant to the minipool for a backward reference. Returns the
16336 node added or NULL if the constant will not fit in this pool.
16338 Note that the code for insertion for a backwards reference can be
16339 somewhat confusing because the calculated offsets for each fix do
16340 not take into account the size of the pool (which is still under
16341 construction. */
16342 static Mnode *
16343 add_minipool_backward_ref (Mfix *fix)
16345 /* If set, min_mp is the last pool_entry that has a lower constraint
16346 than the one we are trying to add. */
16347 Mnode *min_mp = NULL;
16348 /* This can be negative, since it is only a constraint. */
16349 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16350 Mnode *mp;
16352 /* If we can't reach the current pool from this insn, or if we can't
16353 insert this entry at the end of the pool without pushing other
16354 fixes out of range, then we don't try. This ensures that we
16355 can't fail later on. */
16356 if (min_address >= minipool_barrier->address
16357 || (minipool_vector_tail->min_address + fix->fix_size
16358 >= minipool_barrier->address))
16359 return NULL;
16361 /* Scan the pool to see if a constant with the same value has
16362 already been added. While we are doing this, also note the
16363 location where we must insert the constant if it doesn't already
16364 exist. */
16365 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16367 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16368 && fix->mode == mp->mode
16369 && (!LABEL_P (fix->value)
16370 || (CODE_LABEL_NUMBER (fix->value)
16371 == CODE_LABEL_NUMBER (mp->value)))
16372 && rtx_equal_p (fix->value, mp->value)
16373 /* Check that there is enough slack to move this entry to the
16374 end of the table (this is conservative). */
16375 && (mp->max_address
16376 > (minipool_barrier->address
16377 + minipool_vector_tail->offset
16378 + minipool_vector_tail->fix_size)))
16380 mp->refcount++;
16381 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16384 if (min_mp != NULL)
16385 mp->min_address += fix->fix_size;
16386 else
16388 /* Note the insertion point if necessary. */
16389 if (mp->min_address < min_address)
16391 /* For now, we do not allow the insertion of 8-byte alignment
16392 requiring nodes anywhere but at the start of the pool. */
16393 if (ARM_DOUBLEWORD_ALIGN
16394 && fix->fix_size >= 8 && mp->fix_size < 8)
16395 return NULL;
16396 else
16397 min_mp = mp;
16399 else if (mp->max_address
16400 < minipool_barrier->address + mp->offset + fix->fix_size)
16402 /* Inserting before this entry would push the fix beyond
16403 its maximum address (which can happen if we have
16404 re-located a forwards fix); force the new fix to come
16405 after it. */
16406 if (ARM_DOUBLEWORD_ALIGN
16407 && fix->fix_size >= 8 && mp->fix_size < 8)
16408 return NULL;
16409 else
16411 min_mp = mp;
16412 min_address = mp->min_address + fix->fix_size;
16415 /* Do not insert a non-8-byte aligned quantity before 8-byte
16416 aligned quantities. */
16417 else if (ARM_DOUBLEWORD_ALIGN
16418 && fix->fix_size < 8
16419 && mp->fix_size >= 8)
16421 min_mp = mp;
16422 min_address = mp->min_address + fix->fix_size;
16427 /* We need to create a new entry. */
16428 mp = XNEW (Mnode);
16429 mp->fix_size = fix->fix_size;
16430 mp->mode = fix->mode;
16431 mp->value = fix->value;
16432 mp->refcount = 1;
16433 mp->max_address = minipool_barrier->address + 65536;
16435 mp->min_address = min_address;
16437 if (min_mp == NULL)
16439 mp->prev = NULL;
16440 mp->next = minipool_vector_head;
16442 if (mp->next == NULL)
16444 minipool_vector_tail = mp;
16445 minipool_vector_label = gen_label_rtx ();
16447 else
16448 mp->next->prev = mp;
16450 minipool_vector_head = mp;
16452 else
16454 mp->next = min_mp->next;
16455 mp->prev = min_mp;
16456 min_mp->next = mp;
16458 if (mp->next != NULL)
16459 mp->next->prev = mp;
16460 else
16461 minipool_vector_tail = mp;
16464 /* Save the new entry. */
16465 min_mp = mp;
16467 if (mp->prev)
16468 mp = mp->prev;
16469 else
16470 mp->offset = 0;
16472 /* Scan over the following entries and adjust their offsets. */
16473 while (mp->next != NULL)
16475 if (mp->next->min_address < mp->min_address + mp->fix_size)
16476 mp->next->min_address = mp->min_address + mp->fix_size;
16478 if (mp->refcount)
16479 mp->next->offset = mp->offset + mp->fix_size;
16480 else
16481 mp->next->offset = mp->offset;
16483 mp = mp->next;
16486 return min_mp;
16489 static void
16490 assign_minipool_offsets (Mfix *barrier)
16492 HOST_WIDE_INT offset = 0;
16493 Mnode *mp;
16495 minipool_barrier = barrier;
16497 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16499 mp->offset = offset;
16501 if (mp->refcount > 0)
16502 offset += mp->fix_size;
16506 /* Output the literal table */
16507 static void
16508 dump_minipool (rtx_insn *scan)
16510 Mnode * mp;
16511 Mnode * nmp;
16512 int align64 = 0;
16514 if (ARM_DOUBLEWORD_ALIGN)
16515 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16516 if (mp->refcount > 0 && mp->fix_size >= 8)
16518 align64 = 1;
16519 break;
16522 if (dump_file)
16523 fprintf (dump_file,
16524 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16525 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16527 scan = emit_label_after (gen_label_rtx (), scan);
16528 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16529 scan = emit_label_after (minipool_vector_label, scan);
16531 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16533 if (mp->refcount > 0)
16535 if (dump_file)
16537 fprintf (dump_file,
16538 ";; Offset %u, min %ld, max %ld ",
16539 (unsigned) mp->offset, (unsigned long) mp->min_address,
16540 (unsigned long) mp->max_address);
16541 arm_print_value (dump_file, mp->value);
16542 fputc ('\n', dump_file);
16545 switch (GET_MODE_SIZE (mp->mode))
16547 #ifdef HAVE_consttable_1
16548 case 1:
16549 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16550 break;
16552 #endif
16553 #ifdef HAVE_consttable_2
16554 case 2:
16555 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16556 break;
16558 #endif
16559 #ifdef HAVE_consttable_4
16560 case 4:
16561 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16562 break;
16564 #endif
16565 #ifdef HAVE_consttable_8
16566 case 8:
16567 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16568 break;
16570 #endif
16571 #ifdef HAVE_consttable_16
16572 case 16:
16573 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16574 break;
16576 #endif
16577 default:
16578 gcc_unreachable ();
16582 nmp = mp->next;
16583 free (mp);
16586 minipool_vector_head = minipool_vector_tail = NULL;
16587 scan = emit_insn_after (gen_consttable_end (), scan);
16588 scan = emit_barrier_after (scan);
16591 /* Return the cost of forcibly inserting a barrier after INSN. */
16592 static int
16593 arm_barrier_cost (rtx_insn *insn)
16595 /* Basing the location of the pool on the loop depth is preferable,
16596 but at the moment, the basic block information seems to be
16597 corrupt by this stage of the compilation. */
16598 int base_cost = 50;
16599 rtx_insn *next = next_nonnote_insn (insn);
16601 if (next != NULL && LABEL_P (next))
16602 base_cost -= 20;
16604 switch (GET_CODE (insn))
16606 case CODE_LABEL:
16607 /* It will always be better to place the table before the label, rather
16608 than after it. */
16609 return 50;
16611 case INSN:
16612 case CALL_INSN:
16613 return base_cost;
16615 case JUMP_INSN:
16616 return base_cost - 10;
16618 default:
16619 return base_cost + 10;
16623 /* Find the best place in the insn stream in the range
16624 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16625 Create the barrier by inserting a jump and add a new fix entry for
16626 it. */
16627 static Mfix *
16628 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16630 HOST_WIDE_INT count = 0;
16631 rtx_barrier *barrier;
16632 rtx_insn *from = fix->insn;
16633 /* The instruction after which we will insert the jump. */
16634 rtx_insn *selected = NULL;
16635 int selected_cost;
16636 /* The address at which the jump instruction will be placed. */
16637 HOST_WIDE_INT selected_address;
16638 Mfix * new_fix;
16639 HOST_WIDE_INT max_count = max_address - fix->address;
16640 rtx_code_label *label = gen_label_rtx ();
16642 selected_cost = arm_barrier_cost (from);
16643 selected_address = fix->address;
16645 while (from && count < max_count)
16647 rtx_jump_table_data *tmp;
16648 int new_cost;
16650 /* This code shouldn't have been called if there was a natural barrier
16651 within range. */
16652 gcc_assert (!BARRIER_P (from));
16654 /* Count the length of this insn. This must stay in sync with the
16655 code that pushes minipool fixes. */
16656 if (LABEL_P (from))
16657 count += get_label_padding (from);
16658 else
16659 count += get_attr_length (from);
16661 /* If there is a jump table, add its length. */
16662 if (tablejump_p (from, NULL, &tmp))
16664 count += get_jump_table_size (tmp);
16666 /* Jump tables aren't in a basic block, so base the cost on
16667 the dispatch insn. If we select this location, we will
16668 still put the pool after the table. */
16669 new_cost = arm_barrier_cost (from);
16671 if (count < max_count
16672 && (!selected || new_cost <= selected_cost))
16674 selected = tmp;
16675 selected_cost = new_cost;
16676 selected_address = fix->address + count;
16679 /* Continue after the dispatch table. */
16680 from = NEXT_INSN (tmp);
16681 continue;
16684 new_cost = arm_barrier_cost (from);
16686 if (count < max_count
16687 && (!selected || new_cost <= selected_cost))
16689 selected = from;
16690 selected_cost = new_cost;
16691 selected_address = fix->address + count;
16694 from = NEXT_INSN (from);
16697 /* Make sure that we found a place to insert the jump. */
16698 gcc_assert (selected);
16700 /* Make sure we do not split a call and its corresponding
16701 CALL_ARG_LOCATION note. */
16702 if (CALL_P (selected))
16704 rtx_insn *next = NEXT_INSN (selected);
16705 if (next && NOTE_P (next)
16706 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16707 selected = next;
16710 /* Create a new JUMP_INSN that branches around a barrier. */
16711 from = emit_jump_insn_after (gen_jump (label), selected);
16712 JUMP_LABEL (from) = label;
16713 barrier = emit_barrier_after (from);
16714 emit_label_after (label, barrier);
16716 /* Create a minipool barrier entry for the new barrier. */
16717 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16718 new_fix->insn = barrier;
16719 new_fix->address = selected_address;
16720 new_fix->next = fix->next;
16721 fix->next = new_fix;
16723 return new_fix;
16726 /* Record that there is a natural barrier in the insn stream at
16727 ADDRESS. */
16728 static void
16729 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16731 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16733 fix->insn = insn;
16734 fix->address = address;
16736 fix->next = NULL;
16737 if (minipool_fix_head != NULL)
16738 minipool_fix_tail->next = fix;
16739 else
16740 minipool_fix_head = fix;
16742 minipool_fix_tail = fix;
16745 /* Record INSN, which will need fixing up to load a value from the
16746 minipool. ADDRESS is the offset of the insn since the start of the
16747 function; LOC is a pointer to the part of the insn which requires
16748 fixing; VALUE is the constant that must be loaded, which is of type
16749 MODE. */
16750 static void
16751 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16752 machine_mode mode, rtx value)
16754 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16756 fix->insn = insn;
16757 fix->address = address;
16758 fix->loc = loc;
16759 fix->mode = mode;
16760 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16761 fix->value = value;
16762 fix->forwards = get_attr_pool_range (insn);
16763 fix->backwards = get_attr_neg_pool_range (insn);
16764 fix->minipool = NULL;
16766 /* If an insn doesn't have a range defined for it, then it isn't
16767 expecting to be reworked by this code. Better to stop now than
16768 to generate duff assembly code. */
16769 gcc_assert (fix->forwards || fix->backwards);
16771 /* If an entry requires 8-byte alignment then assume all constant pools
16772 require 4 bytes of padding. Trying to do this later on a per-pool
16773 basis is awkward because existing pool entries have to be modified. */
16774 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16775 minipool_pad = 4;
16777 if (dump_file)
16779 fprintf (dump_file,
16780 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16781 GET_MODE_NAME (mode),
16782 INSN_UID (insn), (unsigned long) address,
16783 -1 * (long)fix->backwards, (long)fix->forwards);
16784 arm_print_value (dump_file, fix->value);
16785 fprintf (dump_file, "\n");
16788 /* Add it to the chain of fixes. */
16789 fix->next = NULL;
16791 if (minipool_fix_head != NULL)
16792 minipool_fix_tail->next = fix;
16793 else
16794 minipool_fix_head = fix;
16796 minipool_fix_tail = fix;
16799 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16800 Returns the number of insns needed, or 99 if we always want to synthesize
16801 the value. */
16803 arm_max_const_double_inline_cost ()
16805 /* Let the value get synthesized to avoid the use of literal pools. */
16806 if (arm_disable_literal_pool)
16807 return 99;
16809 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16812 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16813 Returns the number of insns needed, or 99 if we don't know how to
16814 do it. */
16816 arm_const_double_inline_cost (rtx val)
16818 rtx lowpart, highpart;
16819 machine_mode mode;
16821 mode = GET_MODE (val);
16823 if (mode == VOIDmode)
16824 mode = DImode;
16826 gcc_assert (GET_MODE_SIZE (mode) == 8);
16828 lowpart = gen_lowpart (SImode, val);
16829 highpart = gen_highpart_mode (SImode, mode, val);
16831 gcc_assert (CONST_INT_P (lowpart));
16832 gcc_assert (CONST_INT_P (highpart));
16834 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16835 NULL_RTX, NULL_RTX, 0, 0)
16836 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16837 NULL_RTX, NULL_RTX, 0, 0));
16840 /* Cost of loading a SImode constant. */
16841 static inline int
16842 arm_const_inline_cost (enum rtx_code code, rtx val)
16844 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16845 NULL_RTX, NULL_RTX, 1, 0);
16848 /* Return true if it is worthwhile to split a 64-bit constant into two
16849 32-bit operations. This is the case if optimizing for size, or
16850 if we have load delay slots, or if one 32-bit part can be done with
16851 a single data operation. */
16852 bool
16853 arm_const_double_by_parts (rtx val)
16855 machine_mode mode = GET_MODE (val);
16856 rtx part;
16858 if (optimize_size || arm_ld_sched)
16859 return true;
16861 if (mode == VOIDmode)
16862 mode = DImode;
16864 part = gen_highpart_mode (SImode, mode, val);
16866 gcc_assert (CONST_INT_P (part));
16868 if (const_ok_for_arm (INTVAL (part))
16869 || const_ok_for_arm (~INTVAL (part)))
16870 return true;
16872 part = gen_lowpart (SImode, val);
16874 gcc_assert (CONST_INT_P (part));
16876 if (const_ok_for_arm (INTVAL (part))
16877 || const_ok_for_arm (~INTVAL (part)))
16878 return true;
16880 return false;
16883 /* Return true if it is possible to inline both the high and low parts
16884 of a 64-bit constant into 32-bit data processing instructions. */
16885 bool
16886 arm_const_double_by_immediates (rtx val)
16888 machine_mode mode = GET_MODE (val);
16889 rtx part;
16891 if (mode == VOIDmode)
16892 mode = DImode;
16894 part = gen_highpart_mode (SImode, mode, val);
16896 gcc_assert (CONST_INT_P (part));
16898 if (!const_ok_for_arm (INTVAL (part)))
16899 return false;
16901 part = gen_lowpart (SImode, val);
16903 gcc_assert (CONST_INT_P (part));
16905 if (!const_ok_for_arm (INTVAL (part)))
16906 return false;
16908 return true;
16911 /* Scan INSN and note any of its operands that need fixing.
16912 If DO_PUSHES is false we do not actually push any of the fixups
16913 needed. */
16914 static void
16915 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16917 int opno;
16919 extract_constrain_insn (insn);
16921 if (recog_data.n_alternatives == 0)
16922 return;
16924 /* Fill in recog_op_alt with information about the constraints of
16925 this insn. */
16926 preprocess_constraints (insn);
16928 const operand_alternative *op_alt = which_op_alt ();
16929 for (opno = 0; opno < recog_data.n_operands; opno++)
16931 /* Things we need to fix can only occur in inputs. */
16932 if (recog_data.operand_type[opno] != OP_IN)
16933 continue;
16935 /* If this alternative is a memory reference, then any mention
16936 of constants in this alternative is really to fool reload
16937 into allowing us to accept one there. We need to fix them up
16938 now so that we output the right code. */
16939 if (op_alt[opno].memory_ok)
16941 rtx op = recog_data.operand[opno];
16943 if (CONSTANT_P (op))
16945 if (do_pushes)
16946 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16947 recog_data.operand_mode[opno], op);
16949 else if (MEM_P (op)
16950 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16951 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16953 if (do_pushes)
16955 rtx cop = avoid_constant_pool_reference (op);
16957 /* Casting the address of something to a mode narrower
16958 than a word can cause avoid_constant_pool_reference()
16959 to return the pool reference itself. That's no good to
16960 us here. Lets just hope that we can use the
16961 constant pool value directly. */
16962 if (op == cop)
16963 cop = get_pool_constant (XEXP (op, 0));
16965 push_minipool_fix (insn, address,
16966 recog_data.operand_loc[opno],
16967 recog_data.operand_mode[opno], cop);
16974 return;
16977 /* Rewrite move insn into subtract of 0 if the condition codes will
16978 be useful in next conditional jump insn. */
16980 static void
16981 thumb1_reorg (void)
16983 basic_block bb;
16985 FOR_EACH_BB_FN (bb, cfun)
16987 rtx dest, src;
16988 rtx pat, op0, set = NULL;
16989 rtx_insn *prev, *insn = BB_END (bb);
16990 bool insn_clobbered = false;
16992 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
16993 insn = PREV_INSN (insn);
16995 /* Find the last cbranchsi4_insn in basic block BB. */
16996 if (insn == BB_HEAD (bb)
16997 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
16998 continue;
17000 /* Get the register with which we are comparing. */
17001 pat = PATTERN (insn);
17002 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
17004 /* Find the first flag setting insn before INSN in basic block BB. */
17005 gcc_assert (insn != BB_HEAD (bb));
17006 for (prev = PREV_INSN (insn);
17007 (!insn_clobbered
17008 && prev != BB_HEAD (bb)
17009 && (NOTE_P (prev)
17010 || DEBUG_INSN_P (prev)
17011 || ((set = single_set (prev)) != NULL
17012 && get_attr_conds (prev) == CONDS_NOCOND)));
17013 prev = PREV_INSN (prev))
17015 if (reg_set_p (op0, prev))
17016 insn_clobbered = true;
17019 /* Skip if op0 is clobbered by insn other than prev. */
17020 if (insn_clobbered)
17021 continue;
17023 if (!set)
17024 continue;
17026 dest = SET_DEST (set);
17027 src = SET_SRC (set);
17028 if (!low_register_operand (dest, SImode)
17029 || !low_register_operand (src, SImode))
17030 continue;
17032 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17033 in INSN. Both src and dest of the move insn are checked. */
17034 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17036 dest = copy_rtx (dest);
17037 src = copy_rtx (src);
17038 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17039 PATTERN (prev) = gen_rtx_SET (dest, src);
17040 INSN_CODE (prev) = -1;
17041 /* Set test register in INSN to dest. */
17042 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
17043 INSN_CODE (insn) = -1;
17048 /* Convert instructions to their cc-clobbering variant if possible, since
17049 that allows us to use smaller encodings. */
17051 static void
17052 thumb2_reorg (void)
17054 basic_block bb;
17055 regset_head live;
17057 INIT_REG_SET (&live);
17059 /* We are freeing block_for_insn in the toplev to keep compatibility
17060 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17061 compute_bb_for_insn ();
17062 df_analyze ();
17064 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17066 FOR_EACH_BB_FN (bb, cfun)
17068 if (current_tune->disparage_flag_setting_t16_encodings
17069 && optimize_bb_for_speed_p (bb))
17070 continue;
17072 rtx_insn *insn;
17073 Convert_Action action = SKIP;
17074 Convert_Action action_for_partial_flag_setting
17075 = (current_tune->disparage_partial_flag_setting_t16_encodings
17076 && optimize_bb_for_speed_p (bb))
17077 ? SKIP : CONV;
17079 COPY_REG_SET (&live, DF_LR_OUT (bb));
17080 df_simulate_initialize_backwards (bb, &live);
17081 FOR_BB_INSNS_REVERSE (bb, insn)
17083 if (NONJUMP_INSN_P (insn)
17084 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17085 && GET_CODE (PATTERN (insn)) == SET)
17087 action = SKIP;
17088 rtx pat = PATTERN (insn);
17089 rtx dst = XEXP (pat, 0);
17090 rtx src = XEXP (pat, 1);
17091 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17093 if (UNARY_P (src) || BINARY_P (src))
17094 op0 = XEXP (src, 0);
17096 if (BINARY_P (src))
17097 op1 = XEXP (src, 1);
17099 if (low_register_operand (dst, SImode))
17101 switch (GET_CODE (src))
17103 case PLUS:
17104 /* Adding two registers and storing the result
17105 in the first source is already a 16-bit
17106 operation. */
17107 if (rtx_equal_p (dst, op0)
17108 && register_operand (op1, SImode))
17109 break;
17111 if (low_register_operand (op0, SImode))
17113 /* ADDS <Rd>,<Rn>,<Rm> */
17114 if (low_register_operand (op1, SImode))
17115 action = CONV;
17116 /* ADDS <Rdn>,#<imm8> */
17117 /* SUBS <Rdn>,#<imm8> */
17118 else if (rtx_equal_p (dst, op0)
17119 && CONST_INT_P (op1)
17120 && IN_RANGE (INTVAL (op1), -255, 255))
17121 action = CONV;
17122 /* ADDS <Rd>,<Rn>,#<imm3> */
17123 /* SUBS <Rd>,<Rn>,#<imm3> */
17124 else if (CONST_INT_P (op1)
17125 && IN_RANGE (INTVAL (op1), -7, 7))
17126 action = CONV;
17128 /* ADCS <Rd>, <Rn> */
17129 else if (GET_CODE (XEXP (src, 0)) == PLUS
17130 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17131 && low_register_operand (XEXP (XEXP (src, 0), 1),
17132 SImode)
17133 && COMPARISON_P (op1)
17134 && cc_register (XEXP (op1, 0), VOIDmode)
17135 && maybe_get_arm_condition_code (op1) == ARM_CS
17136 && XEXP (op1, 1) == const0_rtx)
17137 action = CONV;
17138 break;
17140 case MINUS:
17141 /* RSBS <Rd>,<Rn>,#0
17142 Not handled here: see NEG below. */
17143 /* SUBS <Rd>,<Rn>,#<imm3>
17144 SUBS <Rdn>,#<imm8>
17145 Not handled here: see PLUS above. */
17146 /* SUBS <Rd>,<Rn>,<Rm> */
17147 if (low_register_operand (op0, SImode)
17148 && low_register_operand (op1, SImode))
17149 action = CONV;
17150 break;
17152 case MULT:
17153 /* MULS <Rdm>,<Rn>,<Rdm>
17154 As an exception to the rule, this is only used
17155 when optimizing for size since MULS is slow on all
17156 known implementations. We do not even want to use
17157 MULS in cold code, if optimizing for speed, so we
17158 test the global flag here. */
17159 if (!optimize_size)
17160 break;
17161 /* else fall through. */
17162 case AND:
17163 case IOR:
17164 case XOR:
17165 /* ANDS <Rdn>,<Rm> */
17166 if (rtx_equal_p (dst, op0)
17167 && low_register_operand (op1, SImode))
17168 action = action_for_partial_flag_setting;
17169 else if (rtx_equal_p (dst, op1)
17170 && low_register_operand (op0, SImode))
17171 action = action_for_partial_flag_setting == SKIP
17172 ? SKIP : SWAP_CONV;
17173 break;
17175 case ASHIFTRT:
17176 case ASHIFT:
17177 case LSHIFTRT:
17178 /* ASRS <Rdn>,<Rm> */
17179 /* LSRS <Rdn>,<Rm> */
17180 /* LSLS <Rdn>,<Rm> */
17181 if (rtx_equal_p (dst, op0)
17182 && low_register_operand (op1, SImode))
17183 action = action_for_partial_flag_setting;
17184 /* ASRS <Rd>,<Rm>,#<imm5> */
17185 /* LSRS <Rd>,<Rm>,#<imm5> */
17186 /* LSLS <Rd>,<Rm>,#<imm5> */
17187 else if (low_register_operand (op0, SImode)
17188 && CONST_INT_P (op1)
17189 && IN_RANGE (INTVAL (op1), 0, 31))
17190 action = action_for_partial_flag_setting;
17191 break;
17193 case ROTATERT:
17194 /* RORS <Rdn>,<Rm> */
17195 if (rtx_equal_p (dst, op0)
17196 && low_register_operand (op1, SImode))
17197 action = action_for_partial_flag_setting;
17198 break;
17200 case NOT:
17201 /* MVNS <Rd>,<Rm> */
17202 if (low_register_operand (op0, SImode))
17203 action = action_for_partial_flag_setting;
17204 break;
17206 case NEG:
17207 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17208 if (low_register_operand (op0, SImode))
17209 action = CONV;
17210 break;
17212 case CONST_INT:
17213 /* MOVS <Rd>,#<imm8> */
17214 if (CONST_INT_P (src)
17215 && IN_RANGE (INTVAL (src), 0, 255))
17216 action = action_for_partial_flag_setting;
17217 break;
17219 case REG:
17220 /* MOVS and MOV<c> with registers have different
17221 encodings, so are not relevant here. */
17222 break;
17224 default:
17225 break;
17229 if (action != SKIP)
17231 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17232 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17233 rtvec vec;
17235 if (action == SWAP_CONV)
17237 src = copy_rtx (src);
17238 XEXP (src, 0) = op1;
17239 XEXP (src, 1) = op0;
17240 pat = gen_rtx_SET (dst, src);
17241 vec = gen_rtvec (2, pat, clobber);
17243 else /* action == CONV */
17244 vec = gen_rtvec (2, pat, clobber);
17246 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17247 INSN_CODE (insn) = -1;
17251 if (NONDEBUG_INSN_P (insn))
17252 df_simulate_one_insn_backwards (bb, insn, &live);
17256 CLEAR_REG_SET (&live);
17259 /* Gcc puts the pool in the wrong place for ARM, since we can only
17260 load addresses a limited distance around the pc. We do some
17261 special munging to move the constant pool values to the correct
17262 point in the code. */
17263 static void
17264 arm_reorg (void)
17266 rtx_insn *insn;
17267 HOST_WIDE_INT address = 0;
17268 Mfix * fix;
17270 if (TARGET_THUMB1)
17271 thumb1_reorg ();
17272 else if (TARGET_THUMB2)
17273 thumb2_reorg ();
17275 /* Ensure all insns that must be split have been split at this point.
17276 Otherwise, the pool placement code below may compute incorrect
17277 insn lengths. Note that when optimizing, all insns have already
17278 been split at this point. */
17279 if (!optimize)
17280 split_all_insns_noflow ();
17282 minipool_fix_head = minipool_fix_tail = NULL;
17284 /* The first insn must always be a note, or the code below won't
17285 scan it properly. */
17286 insn = get_insns ();
17287 gcc_assert (NOTE_P (insn));
17288 minipool_pad = 0;
17290 /* Scan all the insns and record the operands that will need fixing. */
17291 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17293 if (BARRIER_P (insn))
17294 push_minipool_barrier (insn, address);
17295 else if (INSN_P (insn))
17297 rtx_jump_table_data *table;
17299 note_invalid_constants (insn, address, true);
17300 address += get_attr_length (insn);
17302 /* If the insn is a vector jump, add the size of the table
17303 and skip the table. */
17304 if (tablejump_p (insn, NULL, &table))
17306 address += get_jump_table_size (table);
17307 insn = table;
17310 else if (LABEL_P (insn))
17311 /* Add the worst-case padding due to alignment. We don't add
17312 the _current_ padding because the minipool insertions
17313 themselves might change it. */
17314 address += get_label_padding (insn);
17317 fix = minipool_fix_head;
17319 /* Now scan the fixups and perform the required changes. */
17320 while (fix)
17322 Mfix * ftmp;
17323 Mfix * fdel;
17324 Mfix * last_added_fix;
17325 Mfix * last_barrier = NULL;
17326 Mfix * this_fix;
17328 /* Skip any further barriers before the next fix. */
17329 while (fix && BARRIER_P (fix->insn))
17330 fix = fix->next;
17332 /* No more fixes. */
17333 if (fix == NULL)
17334 break;
17336 last_added_fix = NULL;
17338 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17340 if (BARRIER_P (ftmp->insn))
17342 if (ftmp->address >= minipool_vector_head->max_address)
17343 break;
17345 last_barrier = ftmp;
17347 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17348 break;
17350 last_added_fix = ftmp; /* Keep track of the last fix added. */
17353 /* If we found a barrier, drop back to that; any fixes that we
17354 could have reached but come after the barrier will now go in
17355 the next mini-pool. */
17356 if (last_barrier != NULL)
17358 /* Reduce the refcount for those fixes that won't go into this
17359 pool after all. */
17360 for (fdel = last_barrier->next;
17361 fdel && fdel != ftmp;
17362 fdel = fdel->next)
17364 fdel->minipool->refcount--;
17365 fdel->minipool = NULL;
17368 ftmp = last_barrier;
17370 else
17372 /* ftmp is first fix that we can't fit into this pool and
17373 there no natural barriers that we could use. Insert a
17374 new barrier in the code somewhere between the previous
17375 fix and this one, and arrange to jump around it. */
17376 HOST_WIDE_INT max_address;
17378 /* The last item on the list of fixes must be a barrier, so
17379 we can never run off the end of the list of fixes without
17380 last_barrier being set. */
17381 gcc_assert (ftmp);
17383 max_address = minipool_vector_head->max_address;
17384 /* Check that there isn't another fix that is in range that
17385 we couldn't fit into this pool because the pool was
17386 already too large: we need to put the pool before such an
17387 instruction. The pool itself may come just after the
17388 fix because create_fix_barrier also allows space for a
17389 jump instruction. */
17390 if (ftmp->address < max_address)
17391 max_address = ftmp->address + 1;
17393 last_barrier = create_fix_barrier (last_added_fix, max_address);
17396 assign_minipool_offsets (last_barrier);
17398 while (ftmp)
17400 if (!BARRIER_P (ftmp->insn)
17401 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17402 == NULL))
17403 break;
17405 ftmp = ftmp->next;
17408 /* Scan over the fixes we have identified for this pool, fixing them
17409 up and adding the constants to the pool itself. */
17410 for (this_fix = fix; this_fix && ftmp != this_fix;
17411 this_fix = this_fix->next)
17412 if (!BARRIER_P (this_fix->insn))
17414 rtx addr
17415 = plus_constant (Pmode,
17416 gen_rtx_LABEL_REF (VOIDmode,
17417 minipool_vector_label),
17418 this_fix->minipool->offset);
17419 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17422 dump_minipool (last_barrier->insn);
17423 fix = ftmp;
17426 /* From now on we must synthesize any constants that we can't handle
17427 directly. This can happen if the RTL gets split during final
17428 instruction generation. */
17429 cfun->machine->after_arm_reorg = 1;
17431 /* Free the minipool memory. */
17432 obstack_free (&minipool_obstack, minipool_startobj);
17435 /* Routines to output assembly language. */
17437 /* Return string representation of passed in real value. */
17438 static const char *
17439 fp_const_from_val (REAL_VALUE_TYPE *r)
17441 if (!fp_consts_inited)
17442 init_fp_table ();
17444 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17445 return "0";
17448 /* OPERANDS[0] is the entire list of insns that constitute pop,
17449 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17450 is in the list, UPDATE is true iff the list contains explicit
17451 update of base register. */
17452 void
17453 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17454 bool update)
17456 int i;
17457 char pattern[100];
17458 int offset;
17459 const char *conditional;
17460 int num_saves = XVECLEN (operands[0], 0);
17461 unsigned int regno;
17462 unsigned int regno_base = REGNO (operands[1]);
17464 offset = 0;
17465 offset += update ? 1 : 0;
17466 offset += return_pc ? 1 : 0;
17468 /* Is the base register in the list? */
17469 for (i = offset; i < num_saves; i++)
17471 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17472 /* If SP is in the list, then the base register must be SP. */
17473 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17474 /* If base register is in the list, there must be no explicit update. */
17475 if (regno == regno_base)
17476 gcc_assert (!update);
17479 conditional = reverse ? "%?%D0" : "%?%d0";
17480 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17482 /* Output pop (not stmfd) because it has a shorter encoding. */
17483 gcc_assert (update);
17484 sprintf (pattern, "pop%s\t{", conditional);
17486 else
17488 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17489 It's just a convention, their semantics are identical. */
17490 if (regno_base == SP_REGNUM)
17491 sprintf (pattern, "ldm%sfd\t", conditional);
17492 else if (TARGET_UNIFIED_ASM)
17493 sprintf (pattern, "ldmia%s\t", conditional);
17494 else
17495 sprintf (pattern, "ldm%sia\t", conditional);
17497 strcat (pattern, reg_names[regno_base]);
17498 if (update)
17499 strcat (pattern, "!, {");
17500 else
17501 strcat (pattern, ", {");
17504 /* Output the first destination register. */
17505 strcat (pattern,
17506 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17508 /* Output the rest of the destination registers. */
17509 for (i = offset + 1; i < num_saves; i++)
17511 strcat (pattern, ", ");
17512 strcat (pattern,
17513 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17516 strcat (pattern, "}");
17518 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17519 strcat (pattern, "^");
17521 output_asm_insn (pattern, &cond);
17525 /* Output the assembly for a store multiple. */
17527 const char *
17528 vfp_output_vstmd (rtx * operands)
17530 char pattern[100];
17531 int p;
17532 int base;
17533 int i;
17534 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17535 ? XEXP (operands[0], 0)
17536 : XEXP (XEXP (operands[0], 0), 0);
17537 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17539 if (push_p)
17540 strcpy (pattern, "vpush%?.64\t{%P1");
17541 else
17542 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17544 p = strlen (pattern);
17546 gcc_assert (REG_P (operands[1]));
17548 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17549 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17551 p += sprintf (&pattern[p], ", d%d", base + i);
17553 strcpy (&pattern[p], "}");
17555 output_asm_insn (pattern, operands);
17556 return "";
17560 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17561 number of bytes pushed. */
17563 static int
17564 vfp_emit_fstmd (int base_reg, int count)
17566 rtx par;
17567 rtx dwarf;
17568 rtx tmp, reg;
17569 int i;
17571 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17572 register pairs are stored by a store multiple insn. We avoid this
17573 by pushing an extra pair. */
17574 if (count == 2 && !arm_arch6)
17576 if (base_reg == LAST_VFP_REGNUM - 3)
17577 base_reg -= 2;
17578 count++;
17581 /* FSTMD may not store more than 16 doubleword registers at once. Split
17582 larger stores into multiple parts (up to a maximum of two, in
17583 practice). */
17584 if (count > 16)
17586 int saved;
17587 /* NOTE: base_reg is an internal register number, so each D register
17588 counts as 2. */
17589 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17590 saved += vfp_emit_fstmd (base_reg, 16);
17591 return saved;
17594 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17595 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17597 reg = gen_rtx_REG (DFmode, base_reg);
17598 base_reg += 2;
17600 XVECEXP (par, 0, 0)
17601 = gen_rtx_SET (gen_frame_mem
17602 (BLKmode,
17603 gen_rtx_PRE_MODIFY (Pmode,
17604 stack_pointer_rtx,
17605 plus_constant
17606 (Pmode, stack_pointer_rtx,
17607 - (count * 8)))
17609 gen_rtx_UNSPEC (BLKmode,
17610 gen_rtvec (1, reg),
17611 UNSPEC_PUSH_MULT));
17613 tmp = gen_rtx_SET (stack_pointer_rtx,
17614 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17615 RTX_FRAME_RELATED_P (tmp) = 1;
17616 XVECEXP (dwarf, 0, 0) = tmp;
17618 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17619 RTX_FRAME_RELATED_P (tmp) = 1;
17620 XVECEXP (dwarf, 0, 1) = tmp;
17622 for (i = 1; i < count; i++)
17624 reg = gen_rtx_REG (DFmode, base_reg);
17625 base_reg += 2;
17626 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17628 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17629 plus_constant (Pmode,
17630 stack_pointer_rtx,
17631 i * 8)),
17632 reg);
17633 RTX_FRAME_RELATED_P (tmp) = 1;
17634 XVECEXP (dwarf, 0, i + 1) = tmp;
17637 par = emit_insn (par);
17638 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17639 RTX_FRAME_RELATED_P (par) = 1;
17641 return count * 8;
17644 /* Emit a call instruction with pattern PAT. ADDR is the address of
17645 the call target. */
17647 void
17648 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17650 rtx insn;
17652 insn = emit_call_insn (pat);
17654 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17655 If the call might use such an entry, add a use of the PIC register
17656 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17657 if (TARGET_VXWORKS_RTP
17658 && flag_pic
17659 && !sibcall
17660 && GET_CODE (addr) == SYMBOL_REF
17661 && (SYMBOL_REF_DECL (addr)
17662 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17663 : !SYMBOL_REF_LOCAL_P (addr)))
17665 require_pic_register ();
17666 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17669 if (TARGET_AAPCS_BASED)
17671 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17672 linker. We need to add an IP clobber to allow setting
17673 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17674 is not needed since it's a fixed register. */
17675 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17676 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17680 /* Output a 'call' insn. */
17681 const char *
17682 output_call (rtx *operands)
17684 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17686 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17687 if (REGNO (operands[0]) == LR_REGNUM)
17689 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17690 output_asm_insn ("mov%?\t%0, %|lr", operands);
17693 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17695 if (TARGET_INTERWORK || arm_arch4t)
17696 output_asm_insn ("bx%?\t%0", operands);
17697 else
17698 output_asm_insn ("mov%?\t%|pc, %0", operands);
17700 return "";
17703 /* Output a 'call' insn that is a reference in memory. This is
17704 disabled for ARMv5 and we prefer a blx instead because otherwise
17705 there's a significant performance overhead. */
17706 const char *
17707 output_call_mem (rtx *operands)
17709 gcc_assert (!arm_arch5);
17710 if (TARGET_INTERWORK)
17712 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17713 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17714 output_asm_insn ("bx%?\t%|ip", operands);
17716 else if (regno_use_in (LR_REGNUM, operands[0]))
17718 /* LR is used in the memory address. We load the address in the
17719 first instruction. It's safe to use IP as the target of the
17720 load since the call will kill it anyway. */
17721 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17722 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17723 if (arm_arch4t)
17724 output_asm_insn ("bx%?\t%|ip", operands);
17725 else
17726 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17728 else
17730 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17731 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17734 return "";
17738 /* Output a move from arm registers to arm registers of a long double
17739 OPERANDS[0] is the destination.
17740 OPERANDS[1] is the source. */
17741 const char *
17742 output_mov_long_double_arm_from_arm (rtx *operands)
17744 /* We have to be careful here because the two might overlap. */
17745 int dest_start = REGNO (operands[0]);
17746 int src_start = REGNO (operands[1]);
17747 rtx ops[2];
17748 int i;
17750 if (dest_start < src_start)
17752 for (i = 0; i < 3; i++)
17754 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17755 ops[1] = gen_rtx_REG (SImode, src_start + i);
17756 output_asm_insn ("mov%?\t%0, %1", ops);
17759 else
17761 for (i = 2; i >= 0; i--)
17763 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17764 ops[1] = gen_rtx_REG (SImode, src_start + i);
17765 output_asm_insn ("mov%?\t%0, %1", ops);
17769 return "";
17772 void
17773 arm_emit_movpair (rtx dest, rtx src)
17775 /* If the src is an immediate, simplify it. */
17776 if (CONST_INT_P (src))
17778 HOST_WIDE_INT val = INTVAL (src);
17779 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17780 if ((val >> 16) & 0x0000ffff)
17781 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17782 GEN_INT (16)),
17783 GEN_INT ((val >> 16) & 0x0000ffff));
17784 return;
17786 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17787 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17790 /* Output a move between double words. It must be REG<-MEM
17791 or MEM<-REG. */
17792 const char *
17793 output_move_double (rtx *operands, bool emit, int *count)
17795 enum rtx_code code0 = GET_CODE (operands[0]);
17796 enum rtx_code code1 = GET_CODE (operands[1]);
17797 rtx otherops[3];
17798 if (count)
17799 *count = 1;
17801 /* The only case when this might happen is when
17802 you are looking at the length of a DImode instruction
17803 that has an invalid constant in it. */
17804 if (code0 == REG && code1 != MEM)
17806 gcc_assert (!emit);
17807 *count = 2;
17808 return "";
17811 if (code0 == REG)
17813 unsigned int reg0 = REGNO (operands[0]);
17815 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17817 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17819 switch (GET_CODE (XEXP (operands[1], 0)))
17821 case REG:
17823 if (emit)
17825 if (TARGET_LDRD
17826 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17827 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17828 else
17829 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17831 break;
17833 case PRE_INC:
17834 gcc_assert (TARGET_LDRD);
17835 if (emit)
17836 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17837 break;
17839 case PRE_DEC:
17840 if (emit)
17842 if (TARGET_LDRD)
17843 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17844 else
17845 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17847 break;
17849 case POST_INC:
17850 if (emit)
17852 if (TARGET_LDRD)
17853 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17854 else
17855 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17857 break;
17859 case POST_DEC:
17860 gcc_assert (TARGET_LDRD);
17861 if (emit)
17862 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17863 break;
17865 case PRE_MODIFY:
17866 case POST_MODIFY:
17867 /* Autoicrement addressing modes should never have overlapping
17868 base and destination registers, and overlapping index registers
17869 are already prohibited, so this doesn't need to worry about
17870 fix_cm3_ldrd. */
17871 otherops[0] = operands[0];
17872 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17873 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17875 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17877 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17879 /* Registers overlap so split out the increment. */
17880 if (emit)
17882 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17883 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
17885 if (count)
17886 *count = 2;
17888 else
17890 /* Use a single insn if we can.
17891 FIXME: IWMMXT allows offsets larger than ldrd can
17892 handle, fix these up with a pair of ldr. */
17893 if (TARGET_THUMB2
17894 || !CONST_INT_P (otherops[2])
17895 || (INTVAL (otherops[2]) > -256
17896 && INTVAL (otherops[2]) < 256))
17898 if (emit)
17899 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
17901 else
17903 if (emit)
17905 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
17906 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17908 if (count)
17909 *count = 2;
17914 else
17916 /* Use a single insn if we can.
17917 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17918 fix these up with a pair of ldr. */
17919 if (TARGET_THUMB2
17920 || !CONST_INT_P (otherops[2])
17921 || (INTVAL (otherops[2]) > -256
17922 && INTVAL (otherops[2]) < 256))
17924 if (emit)
17925 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
17927 else
17929 if (emit)
17931 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17932 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
17934 if (count)
17935 *count = 2;
17938 break;
17940 case LABEL_REF:
17941 case CONST:
17942 /* We might be able to use ldrd %0, %1 here. However the range is
17943 different to ldr/adr, and it is broken on some ARMv7-M
17944 implementations. */
17945 /* Use the second register of the pair to avoid problematic
17946 overlap. */
17947 otherops[1] = operands[1];
17948 if (emit)
17949 output_asm_insn ("adr%?\t%0, %1", otherops);
17950 operands[1] = otherops[0];
17951 if (emit)
17953 if (TARGET_LDRD)
17954 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
17955 else
17956 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
17959 if (count)
17960 *count = 2;
17961 break;
17963 /* ??? This needs checking for thumb2. */
17964 default:
17965 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
17966 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
17968 otherops[0] = operands[0];
17969 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
17970 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
17972 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
17974 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
17976 switch ((int) INTVAL (otherops[2]))
17978 case -8:
17979 if (emit)
17980 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
17981 return "";
17982 case -4:
17983 if (TARGET_THUMB2)
17984 break;
17985 if (emit)
17986 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
17987 return "";
17988 case 4:
17989 if (TARGET_THUMB2)
17990 break;
17991 if (emit)
17992 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
17993 return "";
17996 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
17997 operands[1] = otherops[0];
17998 if (TARGET_LDRD
17999 && (REG_P (otherops[2])
18000 || TARGET_THUMB2
18001 || (CONST_INT_P (otherops[2])
18002 && INTVAL (otherops[2]) > -256
18003 && INTVAL (otherops[2]) < 256)))
18005 if (reg_overlap_mentioned_p (operands[0],
18006 otherops[2]))
18008 /* Swap base and index registers over to
18009 avoid a conflict. */
18010 std::swap (otherops[1], otherops[2]);
18012 /* If both registers conflict, it will usually
18013 have been fixed by a splitter. */
18014 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18015 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18017 if (emit)
18019 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18020 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18022 if (count)
18023 *count = 2;
18025 else
18027 otherops[0] = operands[0];
18028 if (emit)
18029 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
18031 return "";
18034 if (CONST_INT_P (otherops[2]))
18036 if (emit)
18038 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18039 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18040 else
18041 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18044 else
18046 if (emit)
18047 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18050 else
18052 if (emit)
18053 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18056 if (count)
18057 *count = 2;
18059 if (TARGET_LDRD)
18060 return "ldr%(d%)\t%0, [%1]";
18062 return "ldm%(ia%)\t%1, %M0";
18064 else
18066 otherops[1] = adjust_address (operands[1], SImode, 4);
18067 /* Take care of overlapping base/data reg. */
18068 if (reg_mentioned_p (operands[0], operands[1]))
18070 if (emit)
18072 output_asm_insn ("ldr%?\t%0, %1", otherops);
18073 output_asm_insn ("ldr%?\t%0, %1", operands);
18075 if (count)
18076 *count = 2;
18079 else
18081 if (emit)
18083 output_asm_insn ("ldr%?\t%0, %1", operands);
18084 output_asm_insn ("ldr%?\t%0, %1", otherops);
18086 if (count)
18087 *count = 2;
18092 else
18094 /* Constraints should ensure this. */
18095 gcc_assert (code0 == MEM && code1 == REG);
18096 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18097 || (TARGET_ARM && TARGET_LDRD));
18099 switch (GET_CODE (XEXP (operands[0], 0)))
18101 case REG:
18102 if (emit)
18104 if (TARGET_LDRD)
18105 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18106 else
18107 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18109 break;
18111 case PRE_INC:
18112 gcc_assert (TARGET_LDRD);
18113 if (emit)
18114 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18115 break;
18117 case PRE_DEC:
18118 if (emit)
18120 if (TARGET_LDRD)
18121 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18122 else
18123 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18125 break;
18127 case POST_INC:
18128 if (emit)
18130 if (TARGET_LDRD)
18131 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18132 else
18133 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18135 break;
18137 case POST_DEC:
18138 gcc_assert (TARGET_LDRD);
18139 if (emit)
18140 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18141 break;
18143 case PRE_MODIFY:
18144 case POST_MODIFY:
18145 otherops[0] = operands[1];
18146 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18147 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18149 /* IWMMXT allows offsets larger than ldrd can handle,
18150 fix these up with a pair of ldr. */
18151 if (!TARGET_THUMB2
18152 && CONST_INT_P (otherops[2])
18153 && (INTVAL(otherops[2]) <= -256
18154 || INTVAL(otherops[2]) >= 256))
18156 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18158 if (emit)
18160 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18161 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18163 if (count)
18164 *count = 2;
18166 else
18168 if (emit)
18170 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18171 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18173 if (count)
18174 *count = 2;
18177 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18179 if (emit)
18180 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18182 else
18184 if (emit)
18185 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18187 break;
18189 case PLUS:
18190 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18191 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18193 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18195 case -8:
18196 if (emit)
18197 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18198 return "";
18200 case -4:
18201 if (TARGET_THUMB2)
18202 break;
18203 if (emit)
18204 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18205 return "";
18207 case 4:
18208 if (TARGET_THUMB2)
18209 break;
18210 if (emit)
18211 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18212 return "";
18215 if (TARGET_LDRD
18216 && (REG_P (otherops[2])
18217 || TARGET_THUMB2
18218 || (CONST_INT_P (otherops[2])
18219 && INTVAL (otherops[2]) > -256
18220 && INTVAL (otherops[2]) < 256)))
18222 otherops[0] = operands[1];
18223 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18224 if (emit)
18225 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18226 return "";
18228 /* Fall through */
18230 default:
18231 otherops[0] = adjust_address (operands[0], SImode, 4);
18232 otherops[1] = operands[1];
18233 if (emit)
18235 output_asm_insn ("str%?\t%1, %0", operands);
18236 output_asm_insn ("str%?\t%H1, %0", otherops);
18238 if (count)
18239 *count = 2;
18243 return "";
18246 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18247 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18249 const char *
18250 output_move_quad (rtx *operands)
18252 if (REG_P (operands[0]))
18254 /* Load, or reg->reg move. */
18256 if (MEM_P (operands[1]))
18258 switch (GET_CODE (XEXP (operands[1], 0)))
18260 case REG:
18261 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18262 break;
18264 case LABEL_REF:
18265 case CONST:
18266 output_asm_insn ("adr%?\t%0, %1", operands);
18267 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18268 break;
18270 default:
18271 gcc_unreachable ();
18274 else
18276 rtx ops[2];
18277 int dest, src, i;
18279 gcc_assert (REG_P (operands[1]));
18281 dest = REGNO (operands[0]);
18282 src = REGNO (operands[1]);
18284 /* This seems pretty dumb, but hopefully GCC won't try to do it
18285 very often. */
18286 if (dest < src)
18287 for (i = 0; i < 4; i++)
18289 ops[0] = gen_rtx_REG (SImode, dest + i);
18290 ops[1] = gen_rtx_REG (SImode, src + i);
18291 output_asm_insn ("mov%?\t%0, %1", ops);
18293 else
18294 for (i = 3; i >= 0; i--)
18296 ops[0] = gen_rtx_REG (SImode, dest + i);
18297 ops[1] = gen_rtx_REG (SImode, src + i);
18298 output_asm_insn ("mov%?\t%0, %1", ops);
18302 else
18304 gcc_assert (MEM_P (operands[0]));
18305 gcc_assert (REG_P (operands[1]));
18306 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18308 switch (GET_CODE (XEXP (operands[0], 0)))
18310 case REG:
18311 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18312 break;
18314 default:
18315 gcc_unreachable ();
18319 return "";
18322 /* Output a VFP load or store instruction. */
18324 const char *
18325 output_move_vfp (rtx *operands)
18327 rtx reg, mem, addr, ops[2];
18328 int load = REG_P (operands[0]);
18329 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18330 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18331 const char *templ;
18332 char buff[50];
18333 machine_mode mode;
18335 reg = operands[!load];
18336 mem = operands[load];
18338 mode = GET_MODE (reg);
18340 gcc_assert (REG_P (reg));
18341 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18342 gcc_assert (mode == SFmode
18343 || mode == DFmode
18344 || mode == SImode
18345 || mode == DImode
18346 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18347 gcc_assert (MEM_P (mem));
18349 addr = XEXP (mem, 0);
18351 switch (GET_CODE (addr))
18353 case PRE_DEC:
18354 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18355 ops[0] = XEXP (addr, 0);
18356 ops[1] = reg;
18357 break;
18359 case POST_INC:
18360 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18361 ops[0] = XEXP (addr, 0);
18362 ops[1] = reg;
18363 break;
18365 default:
18366 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18367 ops[0] = reg;
18368 ops[1] = mem;
18369 break;
18372 sprintf (buff, templ,
18373 load ? "ld" : "st",
18374 dp ? "64" : "32",
18375 dp ? "P" : "",
18376 integer_p ? "\t%@ int" : "");
18377 output_asm_insn (buff, ops);
18379 return "";
18382 /* Output a Neon double-word or quad-word load or store, or a load
18383 or store for larger structure modes.
18385 WARNING: The ordering of elements is weird in big-endian mode,
18386 because the EABI requires that vectors stored in memory appear
18387 as though they were stored by a VSTM, as required by the EABI.
18388 GCC RTL defines element ordering based on in-memory order.
18389 This can be different from the architectural ordering of elements
18390 within a NEON register. The intrinsics defined in arm_neon.h use the
18391 NEON register element ordering, not the GCC RTL element ordering.
18393 For example, the in-memory ordering of a big-endian a quadword
18394 vector with 16-bit elements when stored from register pair {d0,d1}
18395 will be (lowest address first, d0[N] is NEON register element N):
18397 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18399 When necessary, quadword registers (dN, dN+1) are moved to ARM
18400 registers from rN in the order:
18402 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18404 So that STM/LDM can be used on vectors in ARM registers, and the
18405 same memory layout will result as if VSTM/VLDM were used.
18407 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18408 possible, which allows use of appropriate alignment tags.
18409 Note that the choice of "64" is independent of the actual vector
18410 element size; this size simply ensures that the behavior is
18411 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18413 Due to limitations of those instructions, use of VST1.64/VLD1.64
18414 is not possible if:
18415 - the address contains PRE_DEC, or
18416 - the mode refers to more than 4 double-word registers
18418 In those cases, it would be possible to replace VSTM/VLDM by a
18419 sequence of instructions; this is not currently implemented since
18420 this is not certain to actually improve performance. */
18422 const char *
18423 output_move_neon (rtx *operands)
18425 rtx reg, mem, addr, ops[2];
18426 int regno, nregs, load = REG_P (operands[0]);
18427 const char *templ;
18428 char buff[50];
18429 machine_mode mode;
18431 reg = operands[!load];
18432 mem = operands[load];
18434 mode = GET_MODE (reg);
18436 gcc_assert (REG_P (reg));
18437 regno = REGNO (reg);
18438 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18439 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18440 || NEON_REGNO_OK_FOR_QUAD (regno));
18441 gcc_assert (VALID_NEON_DREG_MODE (mode)
18442 || VALID_NEON_QREG_MODE (mode)
18443 || VALID_NEON_STRUCT_MODE (mode));
18444 gcc_assert (MEM_P (mem));
18446 addr = XEXP (mem, 0);
18448 /* Strip off const from addresses like (const (plus (...))). */
18449 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18450 addr = XEXP (addr, 0);
18452 switch (GET_CODE (addr))
18454 case POST_INC:
18455 /* We have to use vldm / vstm for too-large modes. */
18456 if (nregs > 4)
18458 templ = "v%smia%%?\t%%0!, %%h1";
18459 ops[0] = XEXP (addr, 0);
18461 else
18463 templ = "v%s1.64\t%%h1, %%A0";
18464 ops[0] = mem;
18466 ops[1] = reg;
18467 break;
18469 case PRE_DEC:
18470 /* We have to use vldm / vstm in this case, since there is no
18471 pre-decrement form of the vld1 / vst1 instructions. */
18472 templ = "v%smdb%%?\t%%0!, %%h1";
18473 ops[0] = XEXP (addr, 0);
18474 ops[1] = reg;
18475 break;
18477 case POST_MODIFY:
18478 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18479 gcc_unreachable ();
18481 case REG:
18482 /* We have to use vldm / vstm for too-large modes. */
18483 if (nregs > 1)
18485 if (nregs > 4)
18486 templ = "v%smia%%?\t%%m0, %%h1";
18487 else
18488 templ = "v%s1.64\t%%h1, %%A0";
18490 ops[0] = mem;
18491 ops[1] = reg;
18492 break;
18494 /* Fall through. */
18495 case LABEL_REF:
18496 case PLUS:
18498 int i;
18499 int overlap = -1;
18500 for (i = 0; i < nregs; i++)
18502 /* We're only using DImode here because it's a convenient size. */
18503 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18504 ops[1] = adjust_address (mem, DImode, 8 * i);
18505 if (reg_overlap_mentioned_p (ops[0], mem))
18507 gcc_assert (overlap == -1);
18508 overlap = i;
18510 else
18512 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18513 output_asm_insn (buff, ops);
18516 if (overlap != -1)
18518 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18519 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18520 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18521 output_asm_insn (buff, ops);
18524 return "";
18527 default:
18528 gcc_unreachable ();
18531 sprintf (buff, templ, load ? "ld" : "st");
18532 output_asm_insn (buff, ops);
18534 return "";
18537 /* Compute and return the length of neon_mov<mode>, where <mode> is
18538 one of VSTRUCT modes: EI, OI, CI or XI. */
18540 arm_attr_length_move_neon (rtx_insn *insn)
18542 rtx reg, mem, addr;
18543 int load;
18544 machine_mode mode;
18546 extract_insn_cached (insn);
18548 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18550 mode = GET_MODE (recog_data.operand[0]);
18551 switch (mode)
18553 case EImode:
18554 case OImode:
18555 return 8;
18556 case CImode:
18557 return 12;
18558 case XImode:
18559 return 16;
18560 default:
18561 gcc_unreachable ();
18565 load = REG_P (recog_data.operand[0]);
18566 reg = recog_data.operand[!load];
18567 mem = recog_data.operand[load];
18569 gcc_assert (MEM_P (mem));
18571 mode = GET_MODE (reg);
18572 addr = XEXP (mem, 0);
18574 /* Strip off const from addresses like (const (plus (...))). */
18575 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18576 addr = XEXP (addr, 0);
18578 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18580 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18581 return insns * 4;
18583 else
18584 return 4;
18587 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18588 return zero. */
18591 arm_address_offset_is_imm (rtx_insn *insn)
18593 rtx mem, addr;
18595 extract_insn_cached (insn);
18597 if (REG_P (recog_data.operand[0]))
18598 return 0;
18600 mem = recog_data.operand[0];
18602 gcc_assert (MEM_P (mem));
18604 addr = XEXP (mem, 0);
18606 if (REG_P (addr)
18607 || (GET_CODE (addr) == PLUS
18608 && REG_P (XEXP (addr, 0))
18609 && CONST_INT_P (XEXP (addr, 1))))
18610 return 1;
18611 else
18612 return 0;
18615 /* Output an ADD r, s, #n where n may be too big for one instruction.
18616 If adding zero to one register, output nothing. */
18617 const char *
18618 output_add_immediate (rtx *operands)
18620 HOST_WIDE_INT n = INTVAL (operands[2]);
18622 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18624 if (n < 0)
18625 output_multi_immediate (operands,
18626 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18627 -n);
18628 else
18629 output_multi_immediate (operands,
18630 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18634 return "";
18637 /* Output a multiple immediate operation.
18638 OPERANDS is the vector of operands referred to in the output patterns.
18639 INSTR1 is the output pattern to use for the first constant.
18640 INSTR2 is the output pattern to use for subsequent constants.
18641 IMMED_OP is the index of the constant slot in OPERANDS.
18642 N is the constant value. */
18643 static const char *
18644 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18645 int immed_op, HOST_WIDE_INT n)
18647 #if HOST_BITS_PER_WIDE_INT > 32
18648 n &= 0xffffffff;
18649 #endif
18651 if (n == 0)
18653 /* Quick and easy output. */
18654 operands[immed_op] = const0_rtx;
18655 output_asm_insn (instr1, operands);
18657 else
18659 int i;
18660 const char * instr = instr1;
18662 /* Note that n is never zero here (which would give no output). */
18663 for (i = 0; i < 32; i += 2)
18665 if (n & (3 << i))
18667 operands[immed_op] = GEN_INT (n & (255 << i));
18668 output_asm_insn (instr, operands);
18669 instr = instr2;
18670 i += 6;
18675 return "";
18678 /* Return the name of a shifter operation. */
18679 static const char *
18680 arm_shift_nmem(enum rtx_code code)
18682 switch (code)
18684 case ASHIFT:
18685 return ARM_LSL_NAME;
18687 case ASHIFTRT:
18688 return "asr";
18690 case LSHIFTRT:
18691 return "lsr";
18693 case ROTATERT:
18694 return "ror";
18696 default:
18697 abort();
18701 /* Return the appropriate ARM instruction for the operation code.
18702 The returned result should not be overwritten. OP is the rtx of the
18703 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18704 was shifted. */
18705 const char *
18706 arithmetic_instr (rtx op, int shift_first_arg)
18708 switch (GET_CODE (op))
18710 case PLUS:
18711 return "add";
18713 case MINUS:
18714 return shift_first_arg ? "rsb" : "sub";
18716 case IOR:
18717 return "orr";
18719 case XOR:
18720 return "eor";
18722 case AND:
18723 return "and";
18725 case ASHIFT:
18726 case ASHIFTRT:
18727 case LSHIFTRT:
18728 case ROTATERT:
18729 return arm_shift_nmem(GET_CODE(op));
18731 default:
18732 gcc_unreachable ();
18736 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18737 for the operation code. The returned result should not be overwritten.
18738 OP is the rtx code of the shift.
18739 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18740 shift. */
18741 static const char *
18742 shift_op (rtx op, HOST_WIDE_INT *amountp)
18744 const char * mnem;
18745 enum rtx_code code = GET_CODE (op);
18747 switch (code)
18749 case ROTATE:
18750 if (!CONST_INT_P (XEXP (op, 1)))
18752 output_operand_lossage ("invalid shift operand");
18753 return NULL;
18756 code = ROTATERT;
18757 *amountp = 32 - INTVAL (XEXP (op, 1));
18758 mnem = "ror";
18759 break;
18761 case ASHIFT:
18762 case ASHIFTRT:
18763 case LSHIFTRT:
18764 case ROTATERT:
18765 mnem = arm_shift_nmem(code);
18766 if (CONST_INT_P (XEXP (op, 1)))
18768 *amountp = INTVAL (XEXP (op, 1));
18770 else if (REG_P (XEXP (op, 1)))
18772 *amountp = -1;
18773 return mnem;
18775 else
18777 output_operand_lossage ("invalid shift operand");
18778 return NULL;
18780 break;
18782 case MULT:
18783 /* We never have to worry about the amount being other than a
18784 power of 2, since this case can never be reloaded from a reg. */
18785 if (!CONST_INT_P (XEXP (op, 1)))
18787 output_operand_lossage ("invalid shift operand");
18788 return NULL;
18791 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18793 /* Amount must be a power of two. */
18794 if (*amountp & (*amountp - 1))
18796 output_operand_lossage ("invalid shift operand");
18797 return NULL;
18800 *amountp = int_log2 (*amountp);
18801 return ARM_LSL_NAME;
18803 default:
18804 output_operand_lossage ("invalid shift operand");
18805 return NULL;
18808 /* This is not 100% correct, but follows from the desire to merge
18809 multiplication by a power of 2 with the recognizer for a
18810 shift. >=32 is not a valid shift for "lsl", so we must try and
18811 output a shift that produces the correct arithmetical result.
18812 Using lsr #32 is identical except for the fact that the carry bit
18813 is not set correctly if we set the flags; but we never use the
18814 carry bit from such an operation, so we can ignore that. */
18815 if (code == ROTATERT)
18816 /* Rotate is just modulo 32. */
18817 *amountp &= 31;
18818 else if (*amountp != (*amountp & 31))
18820 if (code == ASHIFT)
18821 mnem = "lsr";
18822 *amountp = 32;
18825 /* Shifts of 0 are no-ops. */
18826 if (*amountp == 0)
18827 return NULL;
18829 return mnem;
18832 /* Obtain the shift from the POWER of two. */
18834 static HOST_WIDE_INT
18835 int_log2 (HOST_WIDE_INT power)
18837 HOST_WIDE_INT shift = 0;
18839 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18841 gcc_assert (shift <= 31);
18842 shift++;
18845 return shift;
18848 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18849 because /bin/as is horribly restrictive. The judgement about
18850 whether or not each character is 'printable' (and can be output as
18851 is) or not (and must be printed with an octal escape) must be made
18852 with reference to the *host* character set -- the situation is
18853 similar to that discussed in the comments above pp_c_char in
18854 c-pretty-print.c. */
18856 #define MAX_ASCII_LEN 51
18858 void
18859 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18861 int i;
18862 int len_so_far = 0;
18864 fputs ("\t.ascii\t\"", stream);
18866 for (i = 0; i < len; i++)
18868 int c = p[i];
18870 if (len_so_far >= MAX_ASCII_LEN)
18872 fputs ("\"\n\t.ascii\t\"", stream);
18873 len_so_far = 0;
18876 if (ISPRINT (c))
18878 if (c == '\\' || c == '\"')
18880 putc ('\\', stream);
18881 len_so_far++;
18883 putc (c, stream);
18884 len_so_far++;
18886 else
18888 fprintf (stream, "\\%03o", c);
18889 len_so_far += 4;
18893 fputs ("\"\n", stream);
18896 /* Whether a register is callee saved or not. This is necessary because high
18897 registers are marked as caller saved when optimizing for size on Thumb-1
18898 targets despite being callee saved in order to avoid using them. */
18899 #define callee_saved_reg_p(reg) \
18900 (!call_used_regs[reg] \
18901 || (TARGET_THUMB1 && optimize_size \
18902 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
18904 /* Compute the register save mask for registers 0 through 12
18905 inclusive. This code is used by arm_compute_save_reg_mask. */
18907 static unsigned long
18908 arm_compute_save_reg0_reg12_mask (void)
18910 unsigned long func_type = arm_current_func_type ();
18911 unsigned long save_reg_mask = 0;
18912 unsigned int reg;
18914 if (IS_INTERRUPT (func_type))
18916 unsigned int max_reg;
18917 /* Interrupt functions must not corrupt any registers,
18918 even call clobbered ones. If this is a leaf function
18919 we can just examine the registers used by the RTL, but
18920 otherwise we have to assume that whatever function is
18921 called might clobber anything, and so we have to save
18922 all the call-clobbered registers as well. */
18923 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
18924 /* FIQ handlers have registers r8 - r12 banked, so
18925 we only need to check r0 - r7, Normal ISRs only
18926 bank r14 and r15, so we must check up to r12.
18927 r13 is the stack pointer which is always preserved,
18928 so we do not need to consider it here. */
18929 max_reg = 7;
18930 else
18931 max_reg = 12;
18933 for (reg = 0; reg <= max_reg; reg++)
18934 if (df_regs_ever_live_p (reg)
18935 || (! crtl->is_leaf && call_used_regs[reg]))
18936 save_reg_mask |= (1 << reg);
18938 /* Also save the pic base register if necessary. */
18939 if (flag_pic
18940 && !TARGET_SINGLE_PIC_BASE
18941 && arm_pic_register != INVALID_REGNUM
18942 && crtl->uses_pic_offset_table)
18943 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18945 else if (IS_VOLATILE(func_type))
18947 /* For noreturn functions we historically omitted register saves
18948 altogether. However this really messes up debugging. As a
18949 compromise save just the frame pointers. Combined with the link
18950 register saved elsewhere this should be sufficient to get
18951 a backtrace. */
18952 if (frame_pointer_needed)
18953 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18954 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
18955 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18956 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
18957 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
18959 else
18961 /* In the normal case we only need to save those registers
18962 which are call saved and which are used by this function. */
18963 for (reg = 0; reg <= 11; reg++)
18964 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
18965 save_reg_mask |= (1 << reg);
18967 /* Handle the frame pointer as a special case. */
18968 if (frame_pointer_needed)
18969 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18971 /* If we aren't loading the PIC register,
18972 don't stack it even though it may be live. */
18973 if (flag_pic
18974 && !TARGET_SINGLE_PIC_BASE
18975 && arm_pic_register != INVALID_REGNUM
18976 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
18977 || crtl->uses_pic_offset_table))
18978 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18980 /* The prologue will copy SP into R0, so save it. */
18981 if (IS_STACKALIGN (func_type))
18982 save_reg_mask |= 1;
18985 /* Save registers so the exception handler can modify them. */
18986 if (crtl->calls_eh_return)
18988 unsigned int i;
18990 for (i = 0; ; i++)
18992 reg = EH_RETURN_DATA_REGNO (i);
18993 if (reg == INVALID_REGNUM)
18994 break;
18995 save_reg_mask |= 1 << reg;
18999 return save_reg_mask;
19002 /* Return true if r3 is live at the start of the function. */
19004 static bool
19005 arm_r3_live_at_start_p (void)
19007 /* Just look at cfg info, which is still close enough to correct at this
19008 point. This gives false positives for broken functions that might use
19009 uninitialized data that happens to be allocated in r3, but who cares? */
19010 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19013 /* Compute the number of bytes used to store the static chain register on the
19014 stack, above the stack frame. We need to know this accurately to get the
19015 alignment of the rest of the stack frame correct. */
19017 static int
19018 arm_compute_static_chain_stack_bytes (void)
19020 /* See the defining assertion in arm_expand_prologue. */
19021 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
19022 && IS_NESTED (arm_current_func_type ())
19023 && arm_r3_live_at_start_p ()
19024 && crtl->args.pretend_args_size == 0)
19025 return 4;
19027 return 0;
19030 /* Compute a bit mask of which registers need to be
19031 saved on the stack for the current function.
19032 This is used by arm_get_frame_offsets, which may add extra registers. */
19034 static unsigned long
19035 arm_compute_save_reg_mask (void)
19037 unsigned int save_reg_mask = 0;
19038 unsigned long func_type = arm_current_func_type ();
19039 unsigned int reg;
19041 if (IS_NAKED (func_type))
19042 /* This should never really happen. */
19043 return 0;
19045 /* If we are creating a stack frame, then we must save the frame pointer,
19046 IP (which will hold the old stack pointer), LR and the PC. */
19047 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19048 save_reg_mask |=
19049 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19050 | (1 << IP_REGNUM)
19051 | (1 << LR_REGNUM)
19052 | (1 << PC_REGNUM);
19054 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19056 /* Decide if we need to save the link register.
19057 Interrupt routines have their own banked link register,
19058 so they never need to save it.
19059 Otherwise if we do not use the link register we do not need to save
19060 it. If we are pushing other registers onto the stack however, we
19061 can save an instruction in the epilogue by pushing the link register
19062 now and then popping it back into the PC. This incurs extra memory
19063 accesses though, so we only do it when optimizing for size, and only
19064 if we know that we will not need a fancy return sequence. */
19065 if (df_regs_ever_live_p (LR_REGNUM)
19066 || (save_reg_mask
19067 && optimize_size
19068 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19069 && !crtl->tail_call_emit
19070 && !crtl->calls_eh_return))
19071 save_reg_mask |= 1 << LR_REGNUM;
19073 if (cfun->machine->lr_save_eliminated)
19074 save_reg_mask &= ~ (1 << LR_REGNUM);
19076 if (TARGET_REALLY_IWMMXT
19077 && ((bit_count (save_reg_mask)
19078 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19079 arm_compute_static_chain_stack_bytes())
19080 ) % 2) != 0)
19082 /* The total number of registers that are going to be pushed
19083 onto the stack is odd. We need to ensure that the stack
19084 is 64-bit aligned before we start to save iWMMXt registers,
19085 and also before we start to create locals. (A local variable
19086 might be a double or long long which we will load/store using
19087 an iWMMXt instruction). Therefore we need to push another
19088 ARM register, so that the stack will be 64-bit aligned. We
19089 try to avoid using the arg registers (r0 -r3) as they might be
19090 used to pass values in a tail call. */
19091 for (reg = 4; reg <= 12; reg++)
19092 if ((save_reg_mask & (1 << reg)) == 0)
19093 break;
19095 if (reg <= 12)
19096 save_reg_mask |= (1 << reg);
19097 else
19099 cfun->machine->sibcall_blocked = 1;
19100 save_reg_mask |= (1 << 3);
19104 /* We may need to push an additional register for use initializing the
19105 PIC base register. */
19106 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19107 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19109 reg = thumb_find_work_register (1 << 4);
19110 if (!call_used_regs[reg])
19111 save_reg_mask |= (1 << reg);
19114 return save_reg_mask;
19118 /* Compute a bit mask of which registers need to be
19119 saved on the stack for the current function. */
19120 static unsigned long
19121 thumb1_compute_save_reg_mask (void)
19123 unsigned long mask;
19124 unsigned reg;
19126 mask = 0;
19127 for (reg = 0; reg < 12; reg ++)
19128 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19129 mask |= 1 << reg;
19131 if (flag_pic
19132 && !TARGET_SINGLE_PIC_BASE
19133 && arm_pic_register != INVALID_REGNUM
19134 && crtl->uses_pic_offset_table)
19135 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19137 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19138 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19139 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19141 /* LR will also be pushed if any lo regs are pushed. */
19142 if (mask & 0xff || thumb_force_lr_save ())
19143 mask |= (1 << LR_REGNUM);
19145 /* Make sure we have a low work register if we need one.
19146 We will need one if we are going to push a high register,
19147 but we are not currently intending to push a low register. */
19148 if ((mask & 0xff) == 0
19149 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19151 /* Use thumb_find_work_register to choose which register
19152 we will use. If the register is live then we will
19153 have to push it. Use LAST_LO_REGNUM as our fallback
19154 choice for the register to select. */
19155 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19156 /* Make sure the register returned by thumb_find_work_register is
19157 not part of the return value. */
19158 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19159 reg = LAST_LO_REGNUM;
19161 if (callee_saved_reg_p (reg))
19162 mask |= 1 << reg;
19165 /* The 504 below is 8 bytes less than 512 because there are two possible
19166 alignment words. We can't tell here if they will be present or not so we
19167 have to play it safe and assume that they are. */
19168 if ((CALLER_INTERWORKING_SLOT_SIZE +
19169 ROUND_UP_WORD (get_frame_size ()) +
19170 crtl->outgoing_args_size) >= 504)
19172 /* This is the same as the code in thumb1_expand_prologue() which
19173 determines which register to use for stack decrement. */
19174 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19175 if (mask & (1 << reg))
19176 break;
19178 if (reg > LAST_LO_REGNUM)
19180 /* Make sure we have a register available for stack decrement. */
19181 mask |= 1 << LAST_LO_REGNUM;
19185 return mask;
19189 /* Return the number of bytes required to save VFP registers. */
19190 static int
19191 arm_get_vfp_saved_size (void)
19193 unsigned int regno;
19194 int count;
19195 int saved;
19197 saved = 0;
19198 /* Space for saved VFP registers. */
19199 if (TARGET_HARD_FLOAT && TARGET_VFP)
19201 count = 0;
19202 for (regno = FIRST_VFP_REGNUM;
19203 regno < LAST_VFP_REGNUM;
19204 regno += 2)
19206 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19207 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19209 if (count > 0)
19211 /* Workaround ARM10 VFPr1 bug. */
19212 if (count == 2 && !arm_arch6)
19213 count++;
19214 saved += count * 8;
19216 count = 0;
19218 else
19219 count++;
19221 if (count > 0)
19223 if (count == 2 && !arm_arch6)
19224 count++;
19225 saved += count * 8;
19228 return saved;
19232 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19233 everything bar the final return instruction. If simple_return is true,
19234 then do not output epilogue, because it has already been emitted in RTL. */
19235 const char *
19236 output_return_instruction (rtx operand, bool really_return, bool reverse,
19237 bool simple_return)
19239 char conditional[10];
19240 char instr[100];
19241 unsigned reg;
19242 unsigned long live_regs_mask;
19243 unsigned long func_type;
19244 arm_stack_offsets *offsets;
19246 func_type = arm_current_func_type ();
19248 if (IS_NAKED (func_type))
19249 return "";
19251 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19253 /* If this function was declared non-returning, and we have
19254 found a tail call, then we have to trust that the called
19255 function won't return. */
19256 if (really_return)
19258 rtx ops[2];
19260 /* Otherwise, trap an attempted return by aborting. */
19261 ops[0] = operand;
19262 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19263 : "abort");
19264 assemble_external_libcall (ops[1]);
19265 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19268 return "";
19271 gcc_assert (!cfun->calls_alloca || really_return);
19273 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19275 cfun->machine->return_used_this_function = 1;
19277 offsets = arm_get_frame_offsets ();
19278 live_regs_mask = offsets->saved_regs_mask;
19280 if (!simple_return && live_regs_mask)
19282 const char * return_reg;
19284 /* If we do not have any special requirements for function exit
19285 (e.g. interworking) then we can load the return address
19286 directly into the PC. Otherwise we must load it into LR. */
19287 if (really_return
19288 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19289 return_reg = reg_names[PC_REGNUM];
19290 else
19291 return_reg = reg_names[LR_REGNUM];
19293 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19295 /* There are three possible reasons for the IP register
19296 being saved. 1) a stack frame was created, in which case
19297 IP contains the old stack pointer, or 2) an ISR routine
19298 corrupted it, or 3) it was saved to align the stack on
19299 iWMMXt. In case 1, restore IP into SP, otherwise just
19300 restore IP. */
19301 if (frame_pointer_needed)
19303 live_regs_mask &= ~ (1 << IP_REGNUM);
19304 live_regs_mask |= (1 << SP_REGNUM);
19306 else
19307 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19310 /* On some ARM architectures it is faster to use LDR rather than
19311 LDM to load a single register. On other architectures, the
19312 cost is the same. In 26 bit mode, or for exception handlers,
19313 we have to use LDM to load the PC so that the CPSR is also
19314 restored. */
19315 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19316 if (live_regs_mask == (1U << reg))
19317 break;
19319 if (reg <= LAST_ARM_REGNUM
19320 && (reg != LR_REGNUM
19321 || ! really_return
19322 || ! IS_INTERRUPT (func_type)))
19324 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19325 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19327 else
19329 char *p;
19330 int first = 1;
19332 /* Generate the load multiple instruction to restore the
19333 registers. Note we can get here, even if
19334 frame_pointer_needed is true, but only if sp already
19335 points to the base of the saved core registers. */
19336 if (live_regs_mask & (1 << SP_REGNUM))
19338 unsigned HOST_WIDE_INT stack_adjust;
19340 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19341 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19343 if (stack_adjust && arm_arch5 && TARGET_ARM)
19344 if (TARGET_UNIFIED_ASM)
19345 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19346 else
19347 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19348 else
19350 /* If we can't use ldmib (SA110 bug),
19351 then try to pop r3 instead. */
19352 if (stack_adjust)
19353 live_regs_mask |= 1 << 3;
19355 if (TARGET_UNIFIED_ASM)
19356 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19357 else
19358 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19361 else
19362 if (TARGET_UNIFIED_ASM)
19363 sprintf (instr, "pop%s\t{", conditional);
19364 else
19365 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19367 p = instr + strlen (instr);
19369 for (reg = 0; reg <= SP_REGNUM; reg++)
19370 if (live_regs_mask & (1 << reg))
19372 int l = strlen (reg_names[reg]);
19374 if (first)
19375 first = 0;
19376 else
19378 memcpy (p, ", ", 2);
19379 p += 2;
19382 memcpy (p, "%|", 2);
19383 memcpy (p + 2, reg_names[reg], l);
19384 p += l + 2;
19387 if (live_regs_mask & (1 << LR_REGNUM))
19389 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19390 /* If returning from an interrupt, restore the CPSR. */
19391 if (IS_INTERRUPT (func_type))
19392 strcat (p, "^");
19394 else
19395 strcpy (p, "}");
19398 output_asm_insn (instr, & operand);
19400 /* See if we need to generate an extra instruction to
19401 perform the actual function return. */
19402 if (really_return
19403 && func_type != ARM_FT_INTERWORKED
19404 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19406 /* The return has already been handled
19407 by loading the LR into the PC. */
19408 return "";
19412 if (really_return)
19414 switch ((int) ARM_FUNC_TYPE (func_type))
19416 case ARM_FT_ISR:
19417 case ARM_FT_FIQ:
19418 /* ??? This is wrong for unified assembly syntax. */
19419 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19420 break;
19422 case ARM_FT_INTERWORKED:
19423 sprintf (instr, "bx%s\t%%|lr", conditional);
19424 break;
19426 case ARM_FT_EXCEPTION:
19427 /* ??? This is wrong for unified assembly syntax. */
19428 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19429 break;
19431 default:
19432 /* Use bx if it's available. */
19433 if (arm_arch5 || arm_arch4t)
19434 sprintf (instr, "bx%s\t%%|lr", conditional);
19435 else
19436 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19437 break;
19440 output_asm_insn (instr, & operand);
19443 return "";
19446 /* Write the function name into the code section, directly preceding
19447 the function prologue.
19449 Code will be output similar to this:
19451 .ascii "arm_poke_function_name", 0
19452 .align
19454 .word 0xff000000 + (t1 - t0)
19455 arm_poke_function_name
19456 mov ip, sp
19457 stmfd sp!, {fp, ip, lr, pc}
19458 sub fp, ip, #4
19460 When performing a stack backtrace, code can inspect the value
19461 of 'pc' stored at 'fp' + 0. If the trace function then looks
19462 at location pc - 12 and the top 8 bits are set, then we know
19463 that there is a function name embedded immediately preceding this
19464 location and has length ((pc[-3]) & 0xff000000).
19466 We assume that pc is declared as a pointer to an unsigned long.
19468 It is of no benefit to output the function name if we are assembling
19469 a leaf function. These function types will not contain a stack
19470 backtrace structure, therefore it is not possible to determine the
19471 function name. */
19472 void
19473 arm_poke_function_name (FILE *stream, const char *name)
19475 unsigned long alignlength;
19476 unsigned long length;
19477 rtx x;
19479 length = strlen (name) + 1;
19480 alignlength = ROUND_UP_WORD (length);
19482 ASM_OUTPUT_ASCII (stream, name, length);
19483 ASM_OUTPUT_ALIGN (stream, 2);
19484 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19485 assemble_aligned_integer (UNITS_PER_WORD, x);
19488 /* Place some comments into the assembler stream
19489 describing the current function. */
19490 static void
19491 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19493 unsigned long func_type;
19495 /* ??? Do we want to print some of the below anyway? */
19496 if (TARGET_THUMB1)
19497 return;
19499 /* Sanity check. */
19500 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19502 func_type = arm_current_func_type ();
19504 switch ((int) ARM_FUNC_TYPE (func_type))
19506 default:
19507 case ARM_FT_NORMAL:
19508 break;
19509 case ARM_FT_INTERWORKED:
19510 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19511 break;
19512 case ARM_FT_ISR:
19513 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19514 break;
19515 case ARM_FT_FIQ:
19516 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19517 break;
19518 case ARM_FT_EXCEPTION:
19519 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19520 break;
19523 if (IS_NAKED (func_type))
19524 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19526 if (IS_VOLATILE (func_type))
19527 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19529 if (IS_NESTED (func_type))
19530 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19531 if (IS_STACKALIGN (func_type))
19532 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19534 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19535 crtl->args.size,
19536 crtl->args.pretend_args_size, frame_size);
19538 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19539 frame_pointer_needed,
19540 cfun->machine->uses_anonymous_args);
19542 if (cfun->machine->lr_save_eliminated)
19543 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19545 if (crtl->calls_eh_return)
19546 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19550 static void
19551 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19552 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19554 arm_stack_offsets *offsets;
19556 if (TARGET_THUMB1)
19558 int regno;
19560 /* Emit any call-via-reg trampolines that are needed for v4t support
19561 of call_reg and call_value_reg type insns. */
19562 for (regno = 0; regno < LR_REGNUM; regno++)
19564 rtx label = cfun->machine->call_via[regno];
19566 if (label != NULL)
19568 switch_to_section (function_section (current_function_decl));
19569 targetm.asm_out.internal_label (asm_out_file, "L",
19570 CODE_LABEL_NUMBER (label));
19571 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19575 /* ??? Probably not safe to set this here, since it assumes that a
19576 function will be emitted as assembly immediately after we generate
19577 RTL for it. This does not happen for inline functions. */
19578 cfun->machine->return_used_this_function = 0;
19580 else /* TARGET_32BIT */
19582 /* We need to take into account any stack-frame rounding. */
19583 offsets = arm_get_frame_offsets ();
19585 gcc_assert (!use_return_insn (FALSE, NULL)
19586 || (cfun->machine->return_used_this_function != 0)
19587 || offsets->saved_regs == offsets->outgoing_args
19588 || frame_pointer_needed);
19592 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19593 STR and STRD. If an even number of registers are being pushed, one
19594 or more STRD patterns are created for each register pair. If an
19595 odd number of registers are pushed, emit an initial STR followed by
19596 as many STRD instructions as are needed. This works best when the
19597 stack is initially 64-bit aligned (the normal case), since it
19598 ensures that each STRD is also 64-bit aligned. */
19599 static void
19600 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19602 int num_regs = 0;
19603 int i;
19604 int regno;
19605 rtx par = NULL_RTX;
19606 rtx dwarf = NULL_RTX;
19607 rtx tmp;
19608 bool first = true;
19610 num_regs = bit_count (saved_regs_mask);
19612 /* Must be at least one register to save, and can't save SP or PC. */
19613 gcc_assert (num_regs > 0 && num_regs <= 14);
19614 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19615 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19617 /* Create sequence for DWARF info. All the frame-related data for
19618 debugging is held in this wrapper. */
19619 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19621 /* Describe the stack adjustment. */
19622 tmp = gen_rtx_SET (stack_pointer_rtx,
19623 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19624 RTX_FRAME_RELATED_P (tmp) = 1;
19625 XVECEXP (dwarf, 0, 0) = tmp;
19627 /* Find the first register. */
19628 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19631 i = 0;
19633 /* If there's an odd number of registers to push. Start off by
19634 pushing a single register. This ensures that subsequent strd
19635 operations are dword aligned (assuming that SP was originally
19636 64-bit aligned). */
19637 if ((num_regs & 1) != 0)
19639 rtx reg, mem, insn;
19641 reg = gen_rtx_REG (SImode, regno);
19642 if (num_regs == 1)
19643 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19644 stack_pointer_rtx));
19645 else
19646 mem = gen_frame_mem (Pmode,
19647 gen_rtx_PRE_MODIFY
19648 (Pmode, stack_pointer_rtx,
19649 plus_constant (Pmode, stack_pointer_rtx,
19650 -4 * num_regs)));
19652 tmp = gen_rtx_SET (mem, reg);
19653 RTX_FRAME_RELATED_P (tmp) = 1;
19654 insn = emit_insn (tmp);
19655 RTX_FRAME_RELATED_P (insn) = 1;
19656 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19657 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19658 RTX_FRAME_RELATED_P (tmp) = 1;
19659 i++;
19660 regno++;
19661 XVECEXP (dwarf, 0, i) = tmp;
19662 first = false;
19665 while (i < num_regs)
19666 if (saved_regs_mask & (1 << regno))
19668 rtx reg1, reg2, mem1, mem2;
19669 rtx tmp0, tmp1, tmp2;
19670 int regno2;
19672 /* Find the register to pair with this one. */
19673 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19674 regno2++)
19677 reg1 = gen_rtx_REG (SImode, regno);
19678 reg2 = gen_rtx_REG (SImode, regno2);
19680 if (first)
19682 rtx insn;
19684 first = false;
19685 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19686 stack_pointer_rtx,
19687 -4 * num_regs));
19688 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19689 stack_pointer_rtx,
19690 -4 * (num_regs - 1)));
19691 tmp0 = gen_rtx_SET (stack_pointer_rtx,
19692 plus_constant (Pmode, stack_pointer_rtx,
19693 -4 * (num_regs)));
19694 tmp1 = gen_rtx_SET (mem1, reg1);
19695 tmp2 = gen_rtx_SET (mem2, reg2);
19696 RTX_FRAME_RELATED_P (tmp0) = 1;
19697 RTX_FRAME_RELATED_P (tmp1) = 1;
19698 RTX_FRAME_RELATED_P (tmp2) = 1;
19699 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19700 XVECEXP (par, 0, 0) = tmp0;
19701 XVECEXP (par, 0, 1) = tmp1;
19702 XVECEXP (par, 0, 2) = tmp2;
19703 insn = emit_insn (par);
19704 RTX_FRAME_RELATED_P (insn) = 1;
19705 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19707 else
19709 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19710 stack_pointer_rtx,
19711 4 * i));
19712 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19713 stack_pointer_rtx,
19714 4 * (i + 1)));
19715 tmp1 = gen_rtx_SET (mem1, reg1);
19716 tmp2 = gen_rtx_SET (mem2, reg2);
19717 RTX_FRAME_RELATED_P (tmp1) = 1;
19718 RTX_FRAME_RELATED_P (tmp2) = 1;
19719 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19720 XVECEXP (par, 0, 0) = tmp1;
19721 XVECEXP (par, 0, 1) = tmp2;
19722 emit_insn (par);
19725 /* Create unwind information. This is an approximation. */
19726 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19727 plus_constant (Pmode,
19728 stack_pointer_rtx,
19729 4 * i)),
19730 reg1);
19731 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19732 plus_constant (Pmode,
19733 stack_pointer_rtx,
19734 4 * (i + 1))),
19735 reg2);
19737 RTX_FRAME_RELATED_P (tmp1) = 1;
19738 RTX_FRAME_RELATED_P (tmp2) = 1;
19739 XVECEXP (dwarf, 0, i + 1) = tmp1;
19740 XVECEXP (dwarf, 0, i + 2) = tmp2;
19741 i += 2;
19742 regno = regno2 + 1;
19744 else
19745 regno++;
19747 return;
19750 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19751 whenever possible, otherwise it emits single-word stores. The first store
19752 also allocates stack space for all saved registers, using writeback with
19753 post-addressing mode. All other stores use offset addressing. If no STRD
19754 can be emitted, this function emits a sequence of single-word stores,
19755 and not an STM as before, because single-word stores provide more freedom
19756 scheduling and can be turned into an STM by peephole optimizations. */
19757 static void
19758 arm_emit_strd_push (unsigned long saved_regs_mask)
19760 int num_regs = 0;
19761 int i, j, dwarf_index = 0;
19762 int offset = 0;
19763 rtx dwarf = NULL_RTX;
19764 rtx insn = NULL_RTX;
19765 rtx tmp, mem;
19767 /* TODO: A more efficient code can be emitted by changing the
19768 layout, e.g., first push all pairs that can use STRD to keep the
19769 stack aligned, and then push all other registers. */
19770 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19771 if (saved_regs_mask & (1 << i))
19772 num_regs++;
19774 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19775 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19776 gcc_assert (num_regs > 0);
19778 /* Create sequence for DWARF info. */
19779 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19781 /* For dwarf info, we generate explicit stack update. */
19782 tmp = gen_rtx_SET (stack_pointer_rtx,
19783 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19784 RTX_FRAME_RELATED_P (tmp) = 1;
19785 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19787 /* Save registers. */
19788 offset = - 4 * num_regs;
19789 j = 0;
19790 while (j <= LAST_ARM_REGNUM)
19791 if (saved_regs_mask & (1 << j))
19793 if ((j % 2 == 0)
19794 && (saved_regs_mask & (1 << (j + 1))))
19796 /* Current register and previous register form register pair for
19797 which STRD can be generated. */
19798 if (offset < 0)
19800 /* Allocate stack space for all saved registers. */
19801 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19802 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19803 mem = gen_frame_mem (DImode, tmp);
19804 offset = 0;
19806 else if (offset > 0)
19807 mem = gen_frame_mem (DImode,
19808 plus_constant (Pmode,
19809 stack_pointer_rtx,
19810 offset));
19811 else
19812 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19814 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
19815 RTX_FRAME_RELATED_P (tmp) = 1;
19816 tmp = emit_insn (tmp);
19818 /* Record the first store insn. */
19819 if (dwarf_index == 1)
19820 insn = tmp;
19822 /* Generate dwarf info. */
19823 mem = gen_frame_mem (SImode,
19824 plus_constant (Pmode,
19825 stack_pointer_rtx,
19826 offset));
19827 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19828 RTX_FRAME_RELATED_P (tmp) = 1;
19829 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19831 mem = gen_frame_mem (SImode,
19832 plus_constant (Pmode,
19833 stack_pointer_rtx,
19834 offset + 4));
19835 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
19836 RTX_FRAME_RELATED_P (tmp) = 1;
19837 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19839 offset += 8;
19840 j += 2;
19842 else
19844 /* Emit a single word store. */
19845 if (offset < 0)
19847 /* Allocate stack space for all saved registers. */
19848 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19849 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19850 mem = gen_frame_mem (SImode, tmp);
19851 offset = 0;
19853 else if (offset > 0)
19854 mem = gen_frame_mem (SImode,
19855 plus_constant (Pmode,
19856 stack_pointer_rtx,
19857 offset));
19858 else
19859 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19861 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19862 RTX_FRAME_RELATED_P (tmp) = 1;
19863 tmp = emit_insn (tmp);
19865 /* Record the first store insn. */
19866 if (dwarf_index == 1)
19867 insn = tmp;
19869 /* Generate dwarf info. */
19870 mem = gen_frame_mem (SImode,
19871 plus_constant(Pmode,
19872 stack_pointer_rtx,
19873 offset));
19874 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19875 RTX_FRAME_RELATED_P (tmp) = 1;
19876 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19878 offset += 4;
19879 j += 1;
19882 else
19883 j++;
19885 /* Attach dwarf info to the first insn we generate. */
19886 gcc_assert (insn != NULL_RTX);
19887 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19888 RTX_FRAME_RELATED_P (insn) = 1;
19891 /* Generate and emit an insn that we will recognize as a push_multi.
19892 Unfortunately, since this insn does not reflect very well the actual
19893 semantics of the operation, we need to annotate the insn for the benefit
19894 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19895 MASK for registers that should be annotated for DWARF2 frame unwind
19896 information. */
19897 static rtx
19898 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
19900 int num_regs = 0;
19901 int num_dwarf_regs = 0;
19902 int i, j;
19903 rtx par;
19904 rtx dwarf;
19905 int dwarf_par_index;
19906 rtx tmp, reg;
19908 /* We don't record the PC in the dwarf frame information. */
19909 dwarf_regs_mask &= ~(1 << PC_REGNUM);
19911 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19913 if (mask & (1 << i))
19914 num_regs++;
19915 if (dwarf_regs_mask & (1 << i))
19916 num_dwarf_regs++;
19919 gcc_assert (num_regs && num_regs <= 16);
19920 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
19922 /* For the body of the insn we are going to generate an UNSPEC in
19923 parallel with several USEs. This allows the insn to be recognized
19924 by the push_multi pattern in the arm.md file.
19926 The body of the insn looks something like this:
19928 (parallel [
19929 (set (mem:BLK (pre_modify:SI (reg:SI sp)
19930 (const_int:SI <num>)))
19931 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
19932 (use (reg:SI XX))
19933 (use (reg:SI YY))
19937 For the frame note however, we try to be more explicit and actually
19938 show each register being stored into the stack frame, plus a (single)
19939 decrement of the stack pointer. We do it this way in order to be
19940 friendly to the stack unwinding code, which only wants to see a single
19941 stack decrement per instruction. The RTL we generate for the note looks
19942 something like this:
19944 (sequence [
19945 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
19946 (set (mem:SI (reg:SI sp)) (reg:SI r4))
19947 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
19948 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
19952 FIXME:: In an ideal world the PRE_MODIFY would not exist and
19953 instead we'd have a parallel expression detailing all
19954 the stores to the various memory addresses so that debug
19955 information is more up-to-date. Remember however while writing
19956 this to take care of the constraints with the push instruction.
19958 Note also that this has to be taken care of for the VFP registers.
19960 For more see PR43399. */
19962 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
19963 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
19964 dwarf_par_index = 1;
19966 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19968 if (mask & (1 << i))
19970 reg = gen_rtx_REG (SImode, i);
19972 XVECEXP (par, 0, 0)
19973 = gen_rtx_SET (gen_frame_mem
19974 (BLKmode,
19975 gen_rtx_PRE_MODIFY (Pmode,
19976 stack_pointer_rtx,
19977 plus_constant
19978 (Pmode, stack_pointer_rtx,
19979 -4 * num_regs))
19981 gen_rtx_UNSPEC (BLKmode,
19982 gen_rtvec (1, reg),
19983 UNSPEC_PUSH_MULT));
19985 if (dwarf_regs_mask & (1 << i))
19987 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
19988 reg);
19989 RTX_FRAME_RELATED_P (tmp) = 1;
19990 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
19993 break;
19997 for (j = 1, i++; j < num_regs; i++)
19999 if (mask & (1 << i))
20001 reg = gen_rtx_REG (SImode, i);
20003 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20005 if (dwarf_regs_mask & (1 << i))
20008 = gen_rtx_SET (gen_frame_mem
20009 (SImode,
20010 plus_constant (Pmode, stack_pointer_rtx,
20011 4 * j)),
20012 reg);
20013 RTX_FRAME_RELATED_P (tmp) = 1;
20014 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20017 j++;
20021 par = emit_insn (par);
20023 tmp = gen_rtx_SET (stack_pointer_rtx,
20024 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20025 RTX_FRAME_RELATED_P (tmp) = 1;
20026 XVECEXP (dwarf, 0, 0) = tmp;
20028 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20030 return par;
20033 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20034 SIZE is the offset to be adjusted.
20035 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20036 static void
20037 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20039 rtx dwarf;
20041 RTX_FRAME_RELATED_P (insn) = 1;
20042 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20043 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20046 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20047 SAVED_REGS_MASK shows which registers need to be restored.
20049 Unfortunately, since this insn does not reflect very well the actual
20050 semantics of the operation, we need to annotate the insn for the benefit
20051 of DWARF2 frame unwind information. */
20052 static void
20053 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20055 int num_regs = 0;
20056 int i, j;
20057 rtx par;
20058 rtx dwarf = NULL_RTX;
20059 rtx tmp, reg;
20060 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20061 int offset_adj;
20062 int emit_update;
20064 offset_adj = return_in_pc ? 1 : 0;
20065 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20066 if (saved_regs_mask & (1 << i))
20067 num_regs++;
20069 gcc_assert (num_regs && num_regs <= 16);
20071 /* If SP is in reglist, then we don't emit SP update insn. */
20072 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20074 /* The parallel needs to hold num_regs SETs
20075 and one SET for the stack update. */
20076 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20078 if (return_in_pc)
20079 XVECEXP (par, 0, 0) = ret_rtx;
20081 if (emit_update)
20083 /* Increment the stack pointer, based on there being
20084 num_regs 4-byte registers to restore. */
20085 tmp = gen_rtx_SET (stack_pointer_rtx,
20086 plus_constant (Pmode,
20087 stack_pointer_rtx,
20088 4 * num_regs));
20089 RTX_FRAME_RELATED_P (tmp) = 1;
20090 XVECEXP (par, 0, offset_adj) = tmp;
20093 /* Now restore every reg, which may include PC. */
20094 for (j = 0, i = 0; j < num_regs; i++)
20095 if (saved_regs_mask & (1 << i))
20097 reg = gen_rtx_REG (SImode, i);
20098 if ((num_regs == 1) && emit_update && !return_in_pc)
20100 /* Emit single load with writeback. */
20101 tmp = gen_frame_mem (SImode,
20102 gen_rtx_POST_INC (Pmode,
20103 stack_pointer_rtx));
20104 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20105 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20106 return;
20109 tmp = gen_rtx_SET (reg,
20110 gen_frame_mem
20111 (SImode,
20112 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20113 RTX_FRAME_RELATED_P (tmp) = 1;
20114 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20116 /* We need to maintain a sequence for DWARF info too. As dwarf info
20117 should not have PC, skip PC. */
20118 if (i != PC_REGNUM)
20119 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20121 j++;
20124 if (return_in_pc)
20125 par = emit_jump_insn (par);
20126 else
20127 par = emit_insn (par);
20129 REG_NOTES (par) = dwarf;
20130 if (!return_in_pc)
20131 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20132 stack_pointer_rtx, stack_pointer_rtx);
20135 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20136 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20138 Unfortunately, since this insn does not reflect very well the actual
20139 semantics of the operation, we need to annotate the insn for the benefit
20140 of DWARF2 frame unwind information. */
20141 static void
20142 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20144 int i, j;
20145 rtx par;
20146 rtx dwarf = NULL_RTX;
20147 rtx tmp, reg;
20149 gcc_assert (num_regs && num_regs <= 32);
20151 /* Workaround ARM10 VFPr1 bug. */
20152 if (num_regs == 2 && !arm_arch6)
20154 if (first_reg == 15)
20155 first_reg--;
20157 num_regs++;
20160 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20161 there could be up to 32 D-registers to restore.
20162 If there are more than 16 D-registers, make two recursive calls,
20163 each of which emits one pop_multi instruction. */
20164 if (num_regs > 16)
20166 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20167 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20168 return;
20171 /* The parallel needs to hold num_regs SETs
20172 and one SET for the stack update. */
20173 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20175 /* Increment the stack pointer, based on there being
20176 num_regs 8-byte registers to restore. */
20177 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20178 RTX_FRAME_RELATED_P (tmp) = 1;
20179 XVECEXP (par, 0, 0) = tmp;
20181 /* Now show every reg that will be restored, using a SET for each. */
20182 for (j = 0, i=first_reg; j < num_regs; i += 2)
20184 reg = gen_rtx_REG (DFmode, i);
20186 tmp = gen_rtx_SET (reg,
20187 gen_frame_mem
20188 (DFmode,
20189 plus_constant (Pmode, base_reg, 8 * j)));
20190 RTX_FRAME_RELATED_P (tmp) = 1;
20191 XVECEXP (par, 0, j + 1) = tmp;
20193 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20195 j++;
20198 par = emit_insn (par);
20199 REG_NOTES (par) = dwarf;
20201 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20202 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20204 RTX_FRAME_RELATED_P (par) = 1;
20205 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20207 else
20208 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20209 base_reg, base_reg);
20212 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20213 number of registers are being popped, multiple LDRD patterns are created for
20214 all register pairs. If odd number of registers are popped, last register is
20215 loaded by using LDR pattern. */
20216 static void
20217 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20219 int num_regs = 0;
20220 int i, j;
20221 rtx par = NULL_RTX;
20222 rtx dwarf = NULL_RTX;
20223 rtx tmp, reg, tmp1;
20224 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20226 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20227 if (saved_regs_mask & (1 << i))
20228 num_regs++;
20230 gcc_assert (num_regs && num_regs <= 16);
20232 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20233 to be popped. So, if num_regs is even, now it will become odd,
20234 and we can generate pop with PC. If num_regs is odd, it will be
20235 even now, and ldr with return can be generated for PC. */
20236 if (return_in_pc)
20237 num_regs--;
20239 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20241 /* Var j iterates over all the registers to gather all the registers in
20242 saved_regs_mask. Var i gives index of saved registers in stack frame.
20243 A PARALLEL RTX of register-pair is created here, so that pattern for
20244 LDRD can be matched. As PC is always last register to be popped, and
20245 we have already decremented num_regs if PC, we don't have to worry
20246 about PC in this loop. */
20247 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20248 if (saved_regs_mask & (1 << j))
20250 /* Create RTX for memory load. */
20251 reg = gen_rtx_REG (SImode, j);
20252 tmp = gen_rtx_SET (reg,
20253 gen_frame_mem (SImode,
20254 plus_constant (Pmode,
20255 stack_pointer_rtx, 4 * i)));
20256 RTX_FRAME_RELATED_P (tmp) = 1;
20258 if (i % 2 == 0)
20260 /* When saved-register index (i) is even, the RTX to be emitted is
20261 yet to be created. Hence create it first. The LDRD pattern we
20262 are generating is :
20263 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20264 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20265 where target registers need not be consecutive. */
20266 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20267 dwarf = NULL_RTX;
20270 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20271 added as 0th element and if i is odd, reg_i is added as 1st element
20272 of LDRD pattern shown above. */
20273 XVECEXP (par, 0, (i % 2)) = tmp;
20274 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20276 if ((i % 2) == 1)
20278 /* When saved-register index (i) is odd, RTXs for both the registers
20279 to be loaded are generated in above given LDRD pattern, and the
20280 pattern can be emitted now. */
20281 par = emit_insn (par);
20282 REG_NOTES (par) = dwarf;
20283 RTX_FRAME_RELATED_P (par) = 1;
20286 i++;
20289 /* If the number of registers pushed is odd AND return_in_pc is false OR
20290 number of registers are even AND return_in_pc is true, last register is
20291 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20292 then LDR with post increment. */
20294 /* Increment the stack pointer, based on there being
20295 num_regs 4-byte registers to restore. */
20296 tmp = gen_rtx_SET (stack_pointer_rtx,
20297 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20298 RTX_FRAME_RELATED_P (tmp) = 1;
20299 tmp = emit_insn (tmp);
20300 if (!return_in_pc)
20302 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20303 stack_pointer_rtx, stack_pointer_rtx);
20306 dwarf = NULL_RTX;
20308 if (((num_regs % 2) == 1 && !return_in_pc)
20309 || ((num_regs % 2) == 0 && return_in_pc))
20311 /* Scan for the single register to be popped. Skip until the saved
20312 register is found. */
20313 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20315 /* Gen LDR with post increment here. */
20316 tmp1 = gen_rtx_MEM (SImode,
20317 gen_rtx_POST_INC (SImode,
20318 stack_pointer_rtx));
20319 set_mem_alias_set (tmp1, get_frame_alias_set ());
20321 reg = gen_rtx_REG (SImode, j);
20322 tmp = gen_rtx_SET (reg, tmp1);
20323 RTX_FRAME_RELATED_P (tmp) = 1;
20324 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20326 if (return_in_pc)
20328 /* If return_in_pc, j must be PC_REGNUM. */
20329 gcc_assert (j == PC_REGNUM);
20330 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20331 XVECEXP (par, 0, 0) = ret_rtx;
20332 XVECEXP (par, 0, 1) = tmp;
20333 par = emit_jump_insn (par);
20335 else
20337 par = emit_insn (tmp);
20338 REG_NOTES (par) = dwarf;
20339 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20340 stack_pointer_rtx, stack_pointer_rtx);
20344 else if ((num_regs % 2) == 1 && return_in_pc)
20346 /* There are 2 registers to be popped. So, generate the pattern
20347 pop_multiple_with_stack_update_and_return to pop in PC. */
20348 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20351 return;
20354 /* LDRD in ARM mode needs consecutive registers as operands. This function
20355 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20356 offset addressing and then generates one separate stack udpate. This provides
20357 more scheduling freedom, compared to writeback on every load. However,
20358 if the function returns using load into PC directly
20359 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20360 before the last load. TODO: Add a peephole optimization to recognize
20361 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20362 peephole optimization to merge the load at stack-offset zero
20363 with the stack update instruction using load with writeback
20364 in post-index addressing mode. */
20365 static void
20366 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20368 int j = 0;
20369 int offset = 0;
20370 rtx par = NULL_RTX;
20371 rtx dwarf = NULL_RTX;
20372 rtx tmp, mem;
20374 /* Restore saved registers. */
20375 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20376 j = 0;
20377 while (j <= LAST_ARM_REGNUM)
20378 if (saved_regs_mask & (1 << j))
20380 if ((j % 2) == 0
20381 && (saved_regs_mask & (1 << (j + 1)))
20382 && (j + 1) != PC_REGNUM)
20384 /* Current register and next register form register pair for which
20385 LDRD can be generated. PC is always the last register popped, and
20386 we handle it separately. */
20387 if (offset > 0)
20388 mem = gen_frame_mem (DImode,
20389 plus_constant (Pmode,
20390 stack_pointer_rtx,
20391 offset));
20392 else
20393 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20395 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20396 tmp = emit_insn (tmp);
20397 RTX_FRAME_RELATED_P (tmp) = 1;
20399 /* Generate dwarf info. */
20401 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20402 gen_rtx_REG (SImode, j),
20403 NULL_RTX);
20404 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20405 gen_rtx_REG (SImode, j + 1),
20406 dwarf);
20408 REG_NOTES (tmp) = dwarf;
20410 offset += 8;
20411 j += 2;
20413 else if (j != PC_REGNUM)
20415 /* Emit a single word load. */
20416 if (offset > 0)
20417 mem = gen_frame_mem (SImode,
20418 plus_constant (Pmode,
20419 stack_pointer_rtx,
20420 offset));
20421 else
20422 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20424 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20425 tmp = emit_insn (tmp);
20426 RTX_FRAME_RELATED_P (tmp) = 1;
20428 /* Generate dwarf info. */
20429 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20430 gen_rtx_REG (SImode, j),
20431 NULL_RTX);
20433 offset += 4;
20434 j += 1;
20436 else /* j == PC_REGNUM */
20437 j++;
20439 else
20440 j++;
20442 /* Update the stack. */
20443 if (offset > 0)
20445 tmp = gen_rtx_SET (stack_pointer_rtx,
20446 plus_constant (Pmode,
20447 stack_pointer_rtx,
20448 offset));
20449 tmp = emit_insn (tmp);
20450 arm_add_cfa_adjust_cfa_note (tmp, offset,
20451 stack_pointer_rtx, stack_pointer_rtx);
20452 offset = 0;
20455 if (saved_regs_mask & (1 << PC_REGNUM))
20457 /* Only PC is to be popped. */
20458 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20459 XVECEXP (par, 0, 0) = ret_rtx;
20460 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20461 gen_frame_mem (SImode,
20462 gen_rtx_POST_INC (SImode,
20463 stack_pointer_rtx)));
20464 RTX_FRAME_RELATED_P (tmp) = 1;
20465 XVECEXP (par, 0, 1) = tmp;
20466 par = emit_jump_insn (par);
20468 /* Generate dwarf info. */
20469 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20470 gen_rtx_REG (SImode, PC_REGNUM),
20471 NULL_RTX);
20472 REG_NOTES (par) = dwarf;
20473 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20474 stack_pointer_rtx, stack_pointer_rtx);
20478 /* Calculate the size of the return value that is passed in registers. */
20479 static unsigned
20480 arm_size_return_regs (void)
20482 machine_mode mode;
20484 if (crtl->return_rtx != 0)
20485 mode = GET_MODE (crtl->return_rtx);
20486 else
20487 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20489 return GET_MODE_SIZE (mode);
20492 /* Return true if the current function needs to save/restore LR. */
20493 static bool
20494 thumb_force_lr_save (void)
20496 return !cfun->machine->lr_save_eliminated
20497 && (!leaf_function_p ()
20498 || thumb_far_jump_used_p ()
20499 || df_regs_ever_live_p (LR_REGNUM));
20502 /* We do not know if r3 will be available because
20503 we do have an indirect tailcall happening in this
20504 particular case. */
20505 static bool
20506 is_indirect_tailcall_p (rtx call)
20508 rtx pat = PATTERN (call);
20510 /* Indirect tail call. */
20511 pat = XVECEXP (pat, 0, 0);
20512 if (GET_CODE (pat) == SET)
20513 pat = SET_SRC (pat);
20515 pat = XEXP (XEXP (pat, 0), 0);
20516 return REG_P (pat);
20519 /* Return true if r3 is used by any of the tail call insns in the
20520 current function. */
20521 static bool
20522 any_sibcall_could_use_r3 (void)
20524 edge_iterator ei;
20525 edge e;
20527 if (!crtl->tail_call_emit)
20528 return false;
20529 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20530 if (e->flags & EDGE_SIBCALL)
20532 rtx call = BB_END (e->src);
20533 if (!CALL_P (call))
20534 call = prev_nonnote_nondebug_insn (call);
20535 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20536 if (find_regno_fusage (call, USE, 3)
20537 || is_indirect_tailcall_p (call))
20538 return true;
20540 return false;
20544 /* Compute the distance from register FROM to register TO.
20545 These can be the arg pointer (26), the soft frame pointer (25),
20546 the stack pointer (13) or the hard frame pointer (11).
20547 In thumb mode r7 is used as the soft frame pointer, if needed.
20548 Typical stack layout looks like this:
20550 old stack pointer -> | |
20551 ----
20552 | | \
20553 | | saved arguments for
20554 | | vararg functions
20555 | | /
20557 hard FP & arg pointer -> | | \
20558 | | stack
20559 | | frame
20560 | | /
20562 | | \
20563 | | call saved
20564 | | registers
20565 soft frame pointer -> | | /
20567 | | \
20568 | | local
20569 | | variables
20570 locals base pointer -> | | /
20572 | | \
20573 | | outgoing
20574 | | arguments
20575 current stack pointer -> | | /
20578 For a given function some or all of these stack components
20579 may not be needed, giving rise to the possibility of
20580 eliminating some of the registers.
20582 The values returned by this function must reflect the behavior
20583 of arm_expand_prologue() and arm_compute_save_reg_mask().
20585 The sign of the number returned reflects the direction of stack
20586 growth, so the values are positive for all eliminations except
20587 from the soft frame pointer to the hard frame pointer.
20589 SFP may point just inside the local variables block to ensure correct
20590 alignment. */
20593 /* Calculate stack offsets. These are used to calculate register elimination
20594 offsets and in prologue/epilogue code. Also calculates which registers
20595 should be saved. */
20597 static arm_stack_offsets *
20598 arm_get_frame_offsets (void)
20600 struct arm_stack_offsets *offsets;
20601 unsigned long func_type;
20602 int leaf;
20603 int saved;
20604 int core_saved;
20605 HOST_WIDE_INT frame_size;
20606 int i;
20608 offsets = &cfun->machine->stack_offsets;
20610 /* We need to know if we are a leaf function. Unfortunately, it
20611 is possible to be called after start_sequence has been called,
20612 which causes get_insns to return the insns for the sequence,
20613 not the function, which will cause leaf_function_p to return
20614 the incorrect result.
20616 to know about leaf functions once reload has completed, and the
20617 frame size cannot be changed after that time, so we can safely
20618 use the cached value. */
20620 if (reload_completed)
20621 return offsets;
20623 /* Initially this is the size of the local variables. It will translated
20624 into an offset once we have determined the size of preceding data. */
20625 frame_size = ROUND_UP_WORD (get_frame_size ());
20627 leaf = leaf_function_p ();
20629 /* Space for variadic functions. */
20630 offsets->saved_args = crtl->args.pretend_args_size;
20632 /* In Thumb mode this is incorrect, but never used. */
20633 offsets->frame
20634 = (offsets->saved_args
20635 + arm_compute_static_chain_stack_bytes ()
20636 + (frame_pointer_needed ? 4 : 0));
20638 if (TARGET_32BIT)
20640 unsigned int regno;
20642 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20643 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20644 saved = core_saved;
20646 /* We know that SP will be doubleword aligned on entry, and we must
20647 preserve that condition at any subroutine call. We also require the
20648 soft frame pointer to be doubleword aligned. */
20650 if (TARGET_REALLY_IWMMXT)
20652 /* Check for the call-saved iWMMXt registers. */
20653 for (regno = FIRST_IWMMXT_REGNUM;
20654 regno <= LAST_IWMMXT_REGNUM;
20655 regno++)
20656 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20657 saved += 8;
20660 func_type = arm_current_func_type ();
20661 /* Space for saved VFP registers. */
20662 if (! IS_VOLATILE (func_type)
20663 && TARGET_HARD_FLOAT && TARGET_VFP)
20664 saved += arm_get_vfp_saved_size ();
20666 else /* TARGET_THUMB1 */
20668 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20669 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20670 saved = core_saved;
20671 if (TARGET_BACKTRACE)
20672 saved += 16;
20675 /* Saved registers include the stack frame. */
20676 offsets->saved_regs
20677 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20678 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20680 /* A leaf function does not need any stack alignment if it has nothing
20681 on the stack. */
20682 if (leaf && frame_size == 0
20683 /* However if it calls alloca(), we have a dynamically allocated
20684 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20685 && ! cfun->calls_alloca)
20687 offsets->outgoing_args = offsets->soft_frame;
20688 offsets->locals_base = offsets->soft_frame;
20689 return offsets;
20692 /* Ensure SFP has the correct alignment. */
20693 if (ARM_DOUBLEWORD_ALIGN
20694 && (offsets->soft_frame & 7))
20696 offsets->soft_frame += 4;
20697 /* Try to align stack by pushing an extra reg. Don't bother doing this
20698 when there is a stack frame as the alignment will be rolled into
20699 the normal stack adjustment. */
20700 if (frame_size + crtl->outgoing_args_size == 0)
20702 int reg = -1;
20704 /* Register r3 is caller-saved. Normally it does not need to be
20705 saved on entry by the prologue. However if we choose to save
20706 it for padding then we may confuse the compiler into thinking
20707 a prologue sequence is required when in fact it is not. This
20708 will occur when shrink-wrapping if r3 is used as a scratch
20709 register and there are no other callee-saved writes.
20711 This situation can be avoided when other callee-saved registers
20712 are available and r3 is not mandatory if we choose a callee-saved
20713 register for padding. */
20714 bool prefer_callee_reg_p = false;
20716 /* If it is safe to use r3, then do so. This sometimes
20717 generates better code on Thumb-2 by avoiding the need to
20718 use 32-bit push/pop instructions. */
20719 if (! any_sibcall_could_use_r3 ()
20720 && arm_size_return_regs () <= 12
20721 && (offsets->saved_regs_mask & (1 << 3)) == 0
20722 && (TARGET_THUMB2
20723 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20725 reg = 3;
20726 if (!TARGET_THUMB2)
20727 prefer_callee_reg_p = true;
20729 if (reg == -1
20730 || prefer_callee_reg_p)
20732 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20734 /* Avoid fixed registers; they may be changed at
20735 arbitrary times so it's unsafe to restore them
20736 during the epilogue. */
20737 if (!fixed_regs[i]
20738 && (offsets->saved_regs_mask & (1 << i)) == 0)
20740 reg = i;
20741 break;
20746 if (reg != -1)
20748 offsets->saved_regs += 4;
20749 offsets->saved_regs_mask |= (1 << reg);
20754 offsets->locals_base = offsets->soft_frame + frame_size;
20755 offsets->outgoing_args = (offsets->locals_base
20756 + crtl->outgoing_args_size);
20758 if (ARM_DOUBLEWORD_ALIGN)
20760 /* Ensure SP remains doubleword aligned. */
20761 if (offsets->outgoing_args & 7)
20762 offsets->outgoing_args += 4;
20763 gcc_assert (!(offsets->outgoing_args & 7));
20766 return offsets;
20770 /* Calculate the relative offsets for the different stack pointers. Positive
20771 offsets are in the direction of stack growth. */
20773 HOST_WIDE_INT
20774 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20776 arm_stack_offsets *offsets;
20778 offsets = arm_get_frame_offsets ();
20780 /* OK, now we have enough information to compute the distances.
20781 There must be an entry in these switch tables for each pair
20782 of registers in ELIMINABLE_REGS, even if some of the entries
20783 seem to be redundant or useless. */
20784 switch (from)
20786 case ARG_POINTER_REGNUM:
20787 switch (to)
20789 case THUMB_HARD_FRAME_POINTER_REGNUM:
20790 return 0;
20792 case FRAME_POINTER_REGNUM:
20793 /* This is the reverse of the soft frame pointer
20794 to hard frame pointer elimination below. */
20795 return offsets->soft_frame - offsets->saved_args;
20797 case ARM_HARD_FRAME_POINTER_REGNUM:
20798 /* This is only non-zero in the case where the static chain register
20799 is stored above the frame. */
20800 return offsets->frame - offsets->saved_args - 4;
20802 case STACK_POINTER_REGNUM:
20803 /* If nothing has been pushed on the stack at all
20804 then this will return -4. This *is* correct! */
20805 return offsets->outgoing_args - (offsets->saved_args + 4);
20807 default:
20808 gcc_unreachable ();
20810 gcc_unreachable ();
20812 case FRAME_POINTER_REGNUM:
20813 switch (to)
20815 case THUMB_HARD_FRAME_POINTER_REGNUM:
20816 return 0;
20818 case ARM_HARD_FRAME_POINTER_REGNUM:
20819 /* The hard frame pointer points to the top entry in the
20820 stack frame. The soft frame pointer to the bottom entry
20821 in the stack frame. If there is no stack frame at all,
20822 then they are identical. */
20824 return offsets->frame - offsets->soft_frame;
20826 case STACK_POINTER_REGNUM:
20827 return offsets->outgoing_args - offsets->soft_frame;
20829 default:
20830 gcc_unreachable ();
20832 gcc_unreachable ();
20834 default:
20835 /* You cannot eliminate from the stack pointer.
20836 In theory you could eliminate from the hard frame
20837 pointer to the stack pointer, but this will never
20838 happen, since if a stack frame is not needed the
20839 hard frame pointer will never be used. */
20840 gcc_unreachable ();
20844 /* Given FROM and TO register numbers, say whether this elimination is
20845 allowed. Frame pointer elimination is automatically handled.
20847 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20848 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20849 pointer, we must eliminate FRAME_POINTER_REGNUM into
20850 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20851 ARG_POINTER_REGNUM. */
20853 bool
20854 arm_can_eliminate (const int from, const int to)
20856 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20857 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20858 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20859 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20860 true);
20863 /* Emit RTL to save coprocessor registers on function entry. Returns the
20864 number of bytes pushed. */
20866 static int
20867 arm_save_coproc_regs(void)
20869 int saved_size = 0;
20870 unsigned reg;
20871 unsigned start_reg;
20872 rtx insn;
20874 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20875 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20877 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20878 insn = gen_rtx_MEM (V2SImode, insn);
20879 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20880 RTX_FRAME_RELATED_P (insn) = 1;
20881 saved_size += 8;
20884 if (TARGET_HARD_FLOAT && TARGET_VFP)
20886 start_reg = FIRST_VFP_REGNUM;
20888 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
20890 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
20891 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
20893 if (start_reg != reg)
20894 saved_size += vfp_emit_fstmd (start_reg,
20895 (reg - start_reg) / 2);
20896 start_reg = reg + 2;
20899 if (start_reg != reg)
20900 saved_size += vfp_emit_fstmd (start_reg,
20901 (reg - start_reg) / 2);
20903 return saved_size;
20907 /* Set the Thumb frame pointer from the stack pointer. */
20909 static void
20910 thumb_set_frame_pointer (arm_stack_offsets *offsets)
20912 HOST_WIDE_INT amount;
20913 rtx insn, dwarf;
20915 amount = offsets->outgoing_args - offsets->locals_base;
20916 if (amount < 1024)
20917 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20918 stack_pointer_rtx, GEN_INT (amount)));
20919 else
20921 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
20922 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
20923 expects the first two operands to be the same. */
20924 if (TARGET_THUMB2)
20926 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20927 stack_pointer_rtx,
20928 hard_frame_pointer_rtx));
20930 else
20932 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20933 hard_frame_pointer_rtx,
20934 stack_pointer_rtx));
20936 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
20937 plus_constant (Pmode, stack_pointer_rtx, amount));
20938 RTX_FRAME_RELATED_P (dwarf) = 1;
20939 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20942 RTX_FRAME_RELATED_P (insn) = 1;
20945 /* Generate the prologue instructions for entry into an ARM or Thumb-2
20946 function. */
20947 void
20948 arm_expand_prologue (void)
20950 rtx amount;
20951 rtx insn;
20952 rtx ip_rtx;
20953 unsigned long live_regs_mask;
20954 unsigned long func_type;
20955 int fp_offset = 0;
20956 int saved_pretend_args = 0;
20957 int saved_regs = 0;
20958 unsigned HOST_WIDE_INT args_to_push;
20959 arm_stack_offsets *offsets;
20961 func_type = arm_current_func_type ();
20963 /* Naked functions don't have prologues. */
20964 if (IS_NAKED (func_type))
20965 return;
20967 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
20968 args_to_push = crtl->args.pretend_args_size;
20970 /* Compute which register we will have to save onto the stack. */
20971 offsets = arm_get_frame_offsets ();
20972 live_regs_mask = offsets->saved_regs_mask;
20974 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
20976 if (IS_STACKALIGN (func_type))
20978 rtx r0, r1;
20980 /* Handle a word-aligned stack pointer. We generate the following:
20982 mov r0, sp
20983 bic r1, r0, #7
20984 mov sp, r1
20985 <save and restore r0 in normal prologue/epilogue>
20986 mov sp, r0
20987 bx lr
20989 The unwinder doesn't need to know about the stack realignment.
20990 Just tell it we saved SP in r0. */
20991 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
20993 r0 = gen_rtx_REG (SImode, R0_REGNUM);
20994 r1 = gen_rtx_REG (SImode, R1_REGNUM);
20996 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
20997 RTX_FRAME_RELATED_P (insn) = 1;
20998 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21000 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21002 /* ??? The CFA changes here, which may cause GDB to conclude that it
21003 has entered a different function. That said, the unwind info is
21004 correct, individually, before and after this instruction because
21005 we've described the save of SP, which will override the default
21006 handling of SP as restoring from the CFA. */
21007 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21010 /* For APCS frames, if IP register is clobbered
21011 when creating frame, save that register in a special
21012 way. */
21013 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21015 if (IS_INTERRUPT (func_type))
21017 /* Interrupt functions must not corrupt any registers.
21018 Creating a frame pointer however, corrupts the IP
21019 register, so we must push it first. */
21020 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21022 /* Do not set RTX_FRAME_RELATED_P on this insn.
21023 The dwarf stack unwinding code only wants to see one
21024 stack decrement per function, and this is not it. If
21025 this instruction is labeled as being part of the frame
21026 creation sequence then dwarf2out_frame_debug_expr will
21027 die when it encounters the assignment of IP to FP
21028 later on, since the use of SP here establishes SP as
21029 the CFA register and not IP.
21031 Anyway this instruction is not really part of the stack
21032 frame creation although it is part of the prologue. */
21034 else if (IS_NESTED (func_type))
21036 /* The static chain register is the same as the IP register
21037 used as a scratch register during stack frame creation.
21038 To get around this need to find somewhere to store IP
21039 whilst the frame is being created. We try the following
21040 places in order:
21042 1. The last argument register r3 if it is available.
21043 2. A slot on the stack above the frame if there are no
21044 arguments to push onto the stack.
21045 3. Register r3 again, after pushing the argument registers
21046 onto the stack, if this is a varargs function.
21047 4. The last slot on the stack created for the arguments to
21048 push, if this isn't a varargs function.
21050 Note - we only need to tell the dwarf2 backend about the SP
21051 adjustment in the second variant; the static chain register
21052 doesn't need to be unwound, as it doesn't contain a value
21053 inherited from the caller. */
21055 if (!arm_r3_live_at_start_p ())
21056 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21057 else if (args_to_push == 0)
21059 rtx addr, dwarf;
21061 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21062 saved_regs += 4;
21064 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21065 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21066 fp_offset = 4;
21068 /* Just tell the dwarf backend that we adjusted SP. */
21069 dwarf = gen_rtx_SET (stack_pointer_rtx,
21070 plus_constant (Pmode, stack_pointer_rtx,
21071 -fp_offset));
21072 RTX_FRAME_RELATED_P (insn) = 1;
21073 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21075 else
21077 /* Store the args on the stack. */
21078 if (cfun->machine->uses_anonymous_args)
21080 insn
21081 = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21082 (0xf0 >> (args_to_push / 4)) & 0xf);
21083 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21084 saved_pretend_args = 1;
21086 else
21088 rtx addr, dwarf;
21090 if (args_to_push == 4)
21091 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21092 else
21093 addr
21094 = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21095 plus_constant (Pmode,
21096 stack_pointer_rtx,
21097 -args_to_push));
21099 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21101 /* Just tell the dwarf backend that we adjusted SP. */
21102 dwarf
21103 = gen_rtx_SET (stack_pointer_rtx,
21104 plus_constant (Pmode, stack_pointer_rtx,
21105 -args_to_push));
21106 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21109 RTX_FRAME_RELATED_P (insn) = 1;
21110 fp_offset = args_to_push;
21111 args_to_push = 0;
21115 insn = emit_set_insn (ip_rtx,
21116 plus_constant (Pmode, stack_pointer_rtx,
21117 fp_offset));
21118 RTX_FRAME_RELATED_P (insn) = 1;
21121 if (args_to_push)
21123 /* Push the argument registers, or reserve space for them. */
21124 if (cfun->machine->uses_anonymous_args)
21125 insn = emit_multi_reg_push
21126 ((0xf0 >> (args_to_push / 4)) & 0xf,
21127 (0xf0 >> (args_to_push / 4)) & 0xf);
21128 else
21129 insn = emit_insn
21130 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21131 GEN_INT (- args_to_push)));
21132 RTX_FRAME_RELATED_P (insn) = 1;
21135 /* If this is an interrupt service routine, and the link register
21136 is going to be pushed, and we're not generating extra
21137 push of IP (needed when frame is needed and frame layout if apcs),
21138 subtracting four from LR now will mean that the function return
21139 can be done with a single instruction. */
21140 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21141 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21142 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21143 && TARGET_ARM)
21145 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21147 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21150 if (live_regs_mask)
21152 unsigned long dwarf_regs_mask = live_regs_mask;
21154 saved_regs += bit_count (live_regs_mask) * 4;
21155 if (optimize_size && !frame_pointer_needed
21156 && saved_regs == offsets->saved_regs - offsets->saved_args)
21158 /* If no coprocessor registers are being pushed and we don't have
21159 to worry about a frame pointer then push extra registers to
21160 create the stack frame. This is done is a way that does not
21161 alter the frame layout, so is independent of the epilogue. */
21162 int n;
21163 int frame;
21164 n = 0;
21165 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21166 n++;
21167 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21168 if (frame && n * 4 >= frame)
21170 n = frame / 4;
21171 live_regs_mask |= (1 << n) - 1;
21172 saved_regs += frame;
21176 if (TARGET_LDRD
21177 && current_tune->prefer_ldrd_strd
21178 && !optimize_function_for_size_p (cfun))
21180 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21181 if (TARGET_THUMB2)
21182 thumb2_emit_strd_push (live_regs_mask);
21183 else if (TARGET_ARM
21184 && !TARGET_APCS_FRAME
21185 && !IS_INTERRUPT (func_type))
21186 arm_emit_strd_push (live_regs_mask);
21187 else
21189 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21190 RTX_FRAME_RELATED_P (insn) = 1;
21193 else
21195 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21196 RTX_FRAME_RELATED_P (insn) = 1;
21200 if (! IS_VOLATILE (func_type))
21201 saved_regs += arm_save_coproc_regs ();
21203 if (frame_pointer_needed && TARGET_ARM)
21205 /* Create the new frame pointer. */
21206 if (TARGET_APCS_FRAME)
21208 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21209 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21210 RTX_FRAME_RELATED_P (insn) = 1;
21212 if (IS_NESTED (func_type))
21214 /* Recover the static chain register. */
21215 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21216 insn = gen_rtx_REG (SImode, 3);
21217 else
21219 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21220 insn = gen_frame_mem (SImode, insn);
21222 emit_set_insn (ip_rtx, insn);
21223 /* Add a USE to stop propagate_one_insn() from barfing. */
21224 emit_insn (gen_force_register_use (ip_rtx));
21227 else
21229 insn = GEN_INT (saved_regs - 4);
21230 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21231 stack_pointer_rtx, insn));
21232 RTX_FRAME_RELATED_P (insn) = 1;
21236 if (flag_stack_usage_info)
21237 current_function_static_stack_size
21238 = offsets->outgoing_args - offsets->saved_args;
21240 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21242 /* This add can produce multiple insns for a large constant, so we
21243 need to get tricky. */
21244 rtx_insn *last = get_last_insn ();
21246 amount = GEN_INT (offsets->saved_args + saved_regs
21247 - offsets->outgoing_args);
21249 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21250 amount));
21253 last = last ? NEXT_INSN (last) : get_insns ();
21254 RTX_FRAME_RELATED_P (last) = 1;
21256 while (last != insn);
21258 /* If the frame pointer is needed, emit a special barrier that
21259 will prevent the scheduler from moving stores to the frame
21260 before the stack adjustment. */
21261 if (frame_pointer_needed)
21262 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21263 hard_frame_pointer_rtx));
21267 if (frame_pointer_needed && TARGET_THUMB2)
21268 thumb_set_frame_pointer (offsets);
21270 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21272 unsigned long mask;
21274 mask = live_regs_mask;
21275 mask &= THUMB2_WORK_REGS;
21276 if (!IS_NESTED (func_type))
21277 mask |= (1 << IP_REGNUM);
21278 arm_load_pic_register (mask);
21281 /* If we are profiling, make sure no instructions are scheduled before
21282 the call to mcount. Similarly if the user has requested no
21283 scheduling in the prolog. Similarly if we want non-call exceptions
21284 using the EABI unwinder, to prevent faulting instructions from being
21285 swapped with a stack adjustment. */
21286 if (crtl->profile || !TARGET_SCHED_PROLOG
21287 || (arm_except_unwind_info (&global_options) == UI_TARGET
21288 && cfun->can_throw_non_call_exceptions))
21289 emit_insn (gen_blockage ());
21291 /* If the link register is being kept alive, with the return address in it,
21292 then make sure that it does not get reused by the ce2 pass. */
21293 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21294 cfun->machine->lr_save_eliminated = 1;
21297 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21298 static void
21299 arm_print_condition (FILE *stream)
21301 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21303 /* Branch conversion is not implemented for Thumb-2. */
21304 if (TARGET_THUMB)
21306 output_operand_lossage ("predicated Thumb instruction");
21307 return;
21309 if (current_insn_predicate != NULL)
21311 output_operand_lossage
21312 ("predicated instruction in conditional sequence");
21313 return;
21316 fputs (arm_condition_codes[arm_current_cc], stream);
21318 else if (current_insn_predicate)
21320 enum arm_cond_code code;
21322 if (TARGET_THUMB1)
21324 output_operand_lossage ("predicated Thumb instruction");
21325 return;
21328 code = get_arm_condition_code (current_insn_predicate);
21329 fputs (arm_condition_codes[code], stream);
21334 /* Globally reserved letters: acln
21335 Puncutation letters currently used: @_|?().!#
21336 Lower case letters currently used: bcdefhimpqtvwxyz
21337 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21338 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21340 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21342 If CODE is 'd', then the X is a condition operand and the instruction
21343 should only be executed if the condition is true.
21344 if CODE is 'D', then the X is a condition operand and the instruction
21345 should only be executed if the condition is false: however, if the mode
21346 of the comparison is CCFPEmode, then always execute the instruction -- we
21347 do this because in these circumstances !GE does not necessarily imply LT;
21348 in these cases the instruction pattern will take care to make sure that
21349 an instruction containing %d will follow, thereby undoing the effects of
21350 doing this instruction unconditionally.
21351 If CODE is 'N' then X is a floating point operand that must be negated
21352 before output.
21353 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21354 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21355 static void
21356 arm_print_operand (FILE *stream, rtx x, int code)
21358 switch (code)
21360 case '@':
21361 fputs (ASM_COMMENT_START, stream);
21362 return;
21364 case '_':
21365 fputs (user_label_prefix, stream);
21366 return;
21368 case '|':
21369 fputs (REGISTER_PREFIX, stream);
21370 return;
21372 case '?':
21373 arm_print_condition (stream);
21374 return;
21376 case '(':
21377 /* Nothing in unified syntax, otherwise the current condition code. */
21378 if (!TARGET_UNIFIED_ASM)
21379 arm_print_condition (stream);
21380 break;
21382 case ')':
21383 /* The current condition code in unified syntax, otherwise nothing. */
21384 if (TARGET_UNIFIED_ASM)
21385 arm_print_condition (stream);
21386 break;
21388 case '.':
21389 /* The current condition code for a condition code setting instruction.
21390 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21391 if (TARGET_UNIFIED_ASM)
21393 fputc('s', stream);
21394 arm_print_condition (stream);
21396 else
21398 arm_print_condition (stream);
21399 fputc('s', stream);
21401 return;
21403 case '!':
21404 /* If the instruction is conditionally executed then print
21405 the current condition code, otherwise print 's'. */
21406 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21407 if (current_insn_predicate)
21408 arm_print_condition (stream);
21409 else
21410 fputc('s', stream);
21411 break;
21413 /* %# is a "break" sequence. It doesn't output anything, but is used to
21414 separate e.g. operand numbers from following text, if that text consists
21415 of further digits which we don't want to be part of the operand
21416 number. */
21417 case '#':
21418 return;
21420 case 'N':
21422 REAL_VALUE_TYPE r;
21423 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21424 r = real_value_negate (&r);
21425 fprintf (stream, "%s", fp_const_from_val (&r));
21427 return;
21429 /* An integer or symbol address without a preceding # sign. */
21430 case 'c':
21431 switch (GET_CODE (x))
21433 case CONST_INT:
21434 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21435 break;
21437 case SYMBOL_REF:
21438 output_addr_const (stream, x);
21439 break;
21441 case CONST:
21442 if (GET_CODE (XEXP (x, 0)) == PLUS
21443 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21445 output_addr_const (stream, x);
21446 break;
21448 /* Fall through. */
21450 default:
21451 output_operand_lossage ("Unsupported operand for code '%c'", code);
21453 return;
21455 /* An integer that we want to print in HEX. */
21456 case 'x':
21457 switch (GET_CODE (x))
21459 case CONST_INT:
21460 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21461 break;
21463 default:
21464 output_operand_lossage ("Unsupported operand for code '%c'", code);
21466 return;
21468 case 'B':
21469 if (CONST_INT_P (x))
21471 HOST_WIDE_INT val;
21472 val = ARM_SIGN_EXTEND (~INTVAL (x));
21473 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21475 else
21477 putc ('~', stream);
21478 output_addr_const (stream, x);
21480 return;
21482 case 'b':
21483 /* Print the log2 of a CONST_INT. */
21485 HOST_WIDE_INT val;
21487 if (!CONST_INT_P (x)
21488 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21489 output_operand_lossage ("Unsupported operand for code '%c'", code);
21490 else
21491 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21493 return;
21495 case 'L':
21496 /* The low 16 bits of an immediate constant. */
21497 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21498 return;
21500 case 'i':
21501 fprintf (stream, "%s", arithmetic_instr (x, 1));
21502 return;
21504 case 'I':
21505 fprintf (stream, "%s", arithmetic_instr (x, 0));
21506 return;
21508 case 'S':
21510 HOST_WIDE_INT val;
21511 const char *shift;
21513 shift = shift_op (x, &val);
21515 if (shift)
21517 fprintf (stream, ", %s ", shift);
21518 if (val == -1)
21519 arm_print_operand (stream, XEXP (x, 1), 0);
21520 else
21521 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21524 return;
21526 /* An explanation of the 'Q', 'R' and 'H' register operands:
21528 In a pair of registers containing a DI or DF value the 'Q'
21529 operand returns the register number of the register containing
21530 the least significant part of the value. The 'R' operand returns
21531 the register number of the register containing the most
21532 significant part of the value.
21534 The 'H' operand returns the higher of the two register numbers.
21535 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21536 same as the 'Q' operand, since the most significant part of the
21537 value is held in the lower number register. The reverse is true
21538 on systems where WORDS_BIG_ENDIAN is false.
21540 The purpose of these operands is to distinguish between cases
21541 where the endian-ness of the values is important (for example
21542 when they are added together), and cases where the endian-ness
21543 is irrelevant, but the order of register operations is important.
21544 For example when loading a value from memory into a register
21545 pair, the endian-ness does not matter. Provided that the value
21546 from the lower memory address is put into the lower numbered
21547 register, and the value from the higher address is put into the
21548 higher numbered register, the load will work regardless of whether
21549 the value being loaded is big-wordian or little-wordian. The
21550 order of the two register loads can matter however, if the address
21551 of the memory location is actually held in one of the registers
21552 being overwritten by the load.
21554 The 'Q' and 'R' constraints are also available for 64-bit
21555 constants. */
21556 case 'Q':
21557 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21559 rtx part = gen_lowpart (SImode, x);
21560 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21561 return;
21564 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21566 output_operand_lossage ("invalid operand for code '%c'", code);
21567 return;
21570 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21571 return;
21573 case 'R':
21574 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21576 machine_mode mode = GET_MODE (x);
21577 rtx part;
21579 if (mode == VOIDmode)
21580 mode = DImode;
21581 part = gen_highpart_mode (SImode, mode, x);
21582 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21583 return;
21586 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21588 output_operand_lossage ("invalid operand for code '%c'", code);
21589 return;
21592 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21593 return;
21595 case 'H':
21596 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21598 output_operand_lossage ("invalid operand for code '%c'", code);
21599 return;
21602 asm_fprintf (stream, "%r", REGNO (x) + 1);
21603 return;
21605 case 'J':
21606 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21608 output_operand_lossage ("invalid operand for code '%c'", code);
21609 return;
21612 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21613 return;
21615 case 'K':
21616 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21618 output_operand_lossage ("invalid operand for code '%c'", code);
21619 return;
21622 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21623 return;
21625 case 'm':
21626 asm_fprintf (stream, "%r",
21627 REG_P (XEXP (x, 0))
21628 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21629 return;
21631 case 'M':
21632 asm_fprintf (stream, "{%r-%r}",
21633 REGNO (x),
21634 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21635 return;
21637 /* Like 'M', but writing doubleword vector registers, for use by Neon
21638 insns. */
21639 case 'h':
21641 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21642 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21643 if (numregs == 1)
21644 asm_fprintf (stream, "{d%d}", regno);
21645 else
21646 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21648 return;
21650 case 'd':
21651 /* CONST_TRUE_RTX means always -- that's the default. */
21652 if (x == const_true_rtx)
21653 return;
21655 if (!COMPARISON_P (x))
21657 output_operand_lossage ("invalid operand for code '%c'", code);
21658 return;
21661 fputs (arm_condition_codes[get_arm_condition_code (x)],
21662 stream);
21663 return;
21665 case 'D':
21666 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21667 want to do that. */
21668 if (x == const_true_rtx)
21670 output_operand_lossage ("instruction never executed");
21671 return;
21673 if (!COMPARISON_P (x))
21675 output_operand_lossage ("invalid operand for code '%c'", code);
21676 return;
21679 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21680 (get_arm_condition_code (x))],
21681 stream);
21682 return;
21684 case 's':
21685 case 'V':
21686 case 'W':
21687 case 'X':
21688 case 'Y':
21689 case 'Z':
21690 /* Former Maverick support, removed after GCC-4.7. */
21691 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21692 return;
21694 case 'U':
21695 if (!REG_P (x)
21696 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21697 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21698 /* Bad value for wCG register number. */
21700 output_operand_lossage ("invalid operand for code '%c'", code);
21701 return;
21704 else
21705 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21706 return;
21708 /* Print an iWMMXt control register name. */
21709 case 'w':
21710 if (!CONST_INT_P (x)
21711 || INTVAL (x) < 0
21712 || INTVAL (x) >= 16)
21713 /* Bad value for wC register number. */
21715 output_operand_lossage ("invalid operand for code '%c'", code);
21716 return;
21719 else
21721 static const char * wc_reg_names [16] =
21723 "wCID", "wCon", "wCSSF", "wCASF",
21724 "wC4", "wC5", "wC6", "wC7",
21725 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21726 "wC12", "wC13", "wC14", "wC15"
21729 fputs (wc_reg_names [INTVAL (x)], stream);
21731 return;
21733 /* Print the high single-precision register of a VFP double-precision
21734 register. */
21735 case 'p':
21737 machine_mode mode = GET_MODE (x);
21738 int regno;
21740 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21742 output_operand_lossage ("invalid operand for code '%c'", code);
21743 return;
21746 regno = REGNO (x);
21747 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21749 output_operand_lossage ("invalid operand for code '%c'", code);
21750 return;
21753 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21755 return;
21757 /* Print a VFP/Neon double precision or quad precision register name. */
21758 case 'P':
21759 case 'q':
21761 machine_mode mode = GET_MODE (x);
21762 int is_quad = (code == 'q');
21763 int regno;
21765 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21767 output_operand_lossage ("invalid operand for code '%c'", code);
21768 return;
21771 if (!REG_P (x)
21772 || !IS_VFP_REGNUM (REGNO (x)))
21774 output_operand_lossage ("invalid operand for code '%c'", code);
21775 return;
21778 regno = REGNO (x);
21779 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21780 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21782 output_operand_lossage ("invalid operand for code '%c'", code);
21783 return;
21786 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21787 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21789 return;
21791 /* These two codes print the low/high doubleword register of a Neon quad
21792 register, respectively. For pair-structure types, can also print
21793 low/high quadword registers. */
21794 case 'e':
21795 case 'f':
21797 machine_mode mode = GET_MODE (x);
21798 int regno;
21800 if ((GET_MODE_SIZE (mode) != 16
21801 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21803 output_operand_lossage ("invalid operand for code '%c'", code);
21804 return;
21807 regno = REGNO (x);
21808 if (!NEON_REGNO_OK_FOR_QUAD (regno))
21810 output_operand_lossage ("invalid operand for code '%c'", code);
21811 return;
21814 if (GET_MODE_SIZE (mode) == 16)
21815 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21816 + (code == 'f' ? 1 : 0));
21817 else
21818 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21819 + (code == 'f' ? 1 : 0));
21821 return;
21823 /* Print a VFPv3 floating-point constant, represented as an integer
21824 index. */
21825 case 'G':
21827 int index = vfp3_const_double_index (x);
21828 gcc_assert (index != -1);
21829 fprintf (stream, "%d", index);
21831 return;
21833 /* Print bits representing opcode features for Neon.
21835 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21836 and polynomials as unsigned.
21838 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21840 Bit 2 is 1 for rounding functions, 0 otherwise. */
21842 /* Identify the type as 's', 'u', 'p' or 'f'. */
21843 case 'T':
21845 HOST_WIDE_INT bits = INTVAL (x);
21846 fputc ("uspf"[bits & 3], stream);
21848 return;
21850 /* Likewise, but signed and unsigned integers are both 'i'. */
21851 case 'F':
21853 HOST_WIDE_INT bits = INTVAL (x);
21854 fputc ("iipf"[bits & 3], stream);
21856 return;
21858 /* As for 'T', but emit 'u' instead of 'p'. */
21859 case 't':
21861 HOST_WIDE_INT bits = INTVAL (x);
21862 fputc ("usuf"[bits & 3], stream);
21864 return;
21866 /* Bit 2: rounding (vs none). */
21867 case 'O':
21869 HOST_WIDE_INT bits = INTVAL (x);
21870 fputs ((bits & 4) != 0 ? "r" : "", stream);
21872 return;
21874 /* Memory operand for vld1/vst1 instruction. */
21875 case 'A':
21877 rtx addr;
21878 bool postinc = FALSE;
21879 rtx postinc_reg = NULL;
21880 unsigned align, memsize, align_bits;
21882 gcc_assert (MEM_P (x));
21883 addr = XEXP (x, 0);
21884 if (GET_CODE (addr) == POST_INC)
21886 postinc = 1;
21887 addr = XEXP (addr, 0);
21889 if (GET_CODE (addr) == POST_MODIFY)
21891 postinc_reg = XEXP( XEXP (addr, 1), 1);
21892 addr = XEXP (addr, 0);
21894 asm_fprintf (stream, "[%r", REGNO (addr));
21896 /* We know the alignment of this access, so we can emit a hint in the
21897 instruction (for some alignments) as an aid to the memory subsystem
21898 of the target. */
21899 align = MEM_ALIGN (x) >> 3;
21900 memsize = MEM_SIZE (x);
21902 /* Only certain alignment specifiers are supported by the hardware. */
21903 if (memsize == 32 && (align % 32) == 0)
21904 align_bits = 256;
21905 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
21906 align_bits = 128;
21907 else if (memsize >= 8 && (align % 8) == 0)
21908 align_bits = 64;
21909 else
21910 align_bits = 0;
21912 if (align_bits != 0)
21913 asm_fprintf (stream, ":%d", align_bits);
21915 asm_fprintf (stream, "]");
21917 if (postinc)
21918 fputs("!", stream);
21919 if (postinc_reg)
21920 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
21922 return;
21924 case 'C':
21926 rtx addr;
21928 gcc_assert (MEM_P (x));
21929 addr = XEXP (x, 0);
21930 gcc_assert (REG_P (addr));
21931 asm_fprintf (stream, "[%r]", REGNO (addr));
21933 return;
21935 /* Translate an S register number into a D register number and element index. */
21936 case 'y':
21938 machine_mode mode = GET_MODE (x);
21939 int regno;
21941 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
21943 output_operand_lossage ("invalid operand for code '%c'", code);
21944 return;
21947 regno = REGNO (x);
21948 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21950 output_operand_lossage ("invalid operand for code '%c'", code);
21951 return;
21954 regno = regno - FIRST_VFP_REGNUM;
21955 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
21957 return;
21959 case 'v':
21960 gcc_assert (CONST_DOUBLE_P (x));
21961 int result;
21962 result = vfp3_const_double_for_fract_bits (x);
21963 if (result == 0)
21964 result = vfp3_const_double_for_bits (x);
21965 fprintf (stream, "#%d", result);
21966 return;
21968 /* Register specifier for vld1.16/vst1.16. Translate the S register
21969 number into a D register number and element index. */
21970 case 'z':
21972 machine_mode mode = GET_MODE (x);
21973 int regno;
21975 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
21977 output_operand_lossage ("invalid operand for code '%c'", code);
21978 return;
21981 regno = REGNO (x);
21982 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21984 output_operand_lossage ("invalid operand for code '%c'", code);
21985 return;
21988 regno = regno - FIRST_VFP_REGNUM;
21989 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
21991 return;
21993 default:
21994 if (x == 0)
21996 output_operand_lossage ("missing operand");
21997 return;
22000 switch (GET_CODE (x))
22002 case REG:
22003 asm_fprintf (stream, "%r", REGNO (x));
22004 break;
22006 case MEM:
22007 output_memory_reference_mode = GET_MODE (x);
22008 output_address (XEXP (x, 0));
22009 break;
22011 case CONST_DOUBLE:
22013 char fpstr[20];
22014 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22015 sizeof (fpstr), 0, 1);
22016 fprintf (stream, "#%s", fpstr);
22018 break;
22020 default:
22021 gcc_assert (GET_CODE (x) != NEG);
22022 fputc ('#', stream);
22023 if (GET_CODE (x) == HIGH)
22025 fputs (":lower16:", stream);
22026 x = XEXP (x, 0);
22029 output_addr_const (stream, x);
22030 break;
22035 /* Target hook for printing a memory address. */
22036 static void
22037 arm_print_operand_address (FILE *stream, rtx x)
22039 if (TARGET_32BIT)
22041 int is_minus = GET_CODE (x) == MINUS;
22043 if (REG_P (x))
22044 asm_fprintf (stream, "[%r]", REGNO (x));
22045 else if (GET_CODE (x) == PLUS || is_minus)
22047 rtx base = XEXP (x, 0);
22048 rtx index = XEXP (x, 1);
22049 HOST_WIDE_INT offset = 0;
22050 if (!REG_P (base)
22051 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22053 /* Ensure that BASE is a register. */
22054 /* (one of them must be). */
22055 /* Also ensure the SP is not used as in index register. */
22056 std::swap (base, index);
22058 switch (GET_CODE (index))
22060 case CONST_INT:
22061 offset = INTVAL (index);
22062 if (is_minus)
22063 offset = -offset;
22064 asm_fprintf (stream, "[%r, #%wd]",
22065 REGNO (base), offset);
22066 break;
22068 case REG:
22069 asm_fprintf (stream, "[%r, %s%r]",
22070 REGNO (base), is_minus ? "-" : "",
22071 REGNO (index));
22072 break;
22074 case MULT:
22075 case ASHIFTRT:
22076 case LSHIFTRT:
22077 case ASHIFT:
22078 case ROTATERT:
22080 asm_fprintf (stream, "[%r, %s%r",
22081 REGNO (base), is_minus ? "-" : "",
22082 REGNO (XEXP (index, 0)));
22083 arm_print_operand (stream, index, 'S');
22084 fputs ("]", stream);
22085 break;
22088 default:
22089 gcc_unreachable ();
22092 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22093 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22095 extern machine_mode output_memory_reference_mode;
22097 gcc_assert (REG_P (XEXP (x, 0)));
22099 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22100 asm_fprintf (stream, "[%r, #%s%d]!",
22101 REGNO (XEXP (x, 0)),
22102 GET_CODE (x) == PRE_DEC ? "-" : "",
22103 GET_MODE_SIZE (output_memory_reference_mode));
22104 else
22105 asm_fprintf (stream, "[%r], #%s%d",
22106 REGNO (XEXP (x, 0)),
22107 GET_CODE (x) == POST_DEC ? "-" : "",
22108 GET_MODE_SIZE (output_memory_reference_mode));
22110 else if (GET_CODE (x) == PRE_MODIFY)
22112 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22113 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22114 asm_fprintf (stream, "#%wd]!",
22115 INTVAL (XEXP (XEXP (x, 1), 1)));
22116 else
22117 asm_fprintf (stream, "%r]!",
22118 REGNO (XEXP (XEXP (x, 1), 1)));
22120 else if (GET_CODE (x) == POST_MODIFY)
22122 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22123 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22124 asm_fprintf (stream, "#%wd",
22125 INTVAL (XEXP (XEXP (x, 1), 1)));
22126 else
22127 asm_fprintf (stream, "%r",
22128 REGNO (XEXP (XEXP (x, 1), 1)));
22130 else output_addr_const (stream, x);
22132 else
22134 if (REG_P (x))
22135 asm_fprintf (stream, "[%r]", REGNO (x));
22136 else if (GET_CODE (x) == POST_INC)
22137 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22138 else if (GET_CODE (x) == PLUS)
22140 gcc_assert (REG_P (XEXP (x, 0)));
22141 if (CONST_INT_P (XEXP (x, 1)))
22142 asm_fprintf (stream, "[%r, #%wd]",
22143 REGNO (XEXP (x, 0)),
22144 INTVAL (XEXP (x, 1)));
22145 else
22146 asm_fprintf (stream, "[%r, %r]",
22147 REGNO (XEXP (x, 0)),
22148 REGNO (XEXP (x, 1)));
22150 else
22151 output_addr_const (stream, x);
22155 /* Target hook for indicating whether a punctuation character for
22156 TARGET_PRINT_OPERAND is valid. */
22157 static bool
22158 arm_print_operand_punct_valid_p (unsigned char code)
22160 return (code == '@' || code == '|' || code == '.'
22161 || code == '(' || code == ')' || code == '#'
22162 || (TARGET_32BIT && (code == '?'))
22163 || (TARGET_THUMB2 && (code == '!'))
22164 || (TARGET_THUMB && (code == '_')));
22167 /* Target hook for assembling integer objects. The ARM version needs to
22168 handle word-sized values specially. */
22169 static bool
22170 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22172 machine_mode mode;
22174 if (size == UNITS_PER_WORD && aligned_p)
22176 fputs ("\t.word\t", asm_out_file);
22177 output_addr_const (asm_out_file, x);
22179 /* Mark symbols as position independent. We only do this in the
22180 .text segment, not in the .data segment. */
22181 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22182 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22184 /* See legitimize_pic_address for an explanation of the
22185 TARGET_VXWORKS_RTP check. */
22186 if (!arm_pic_data_is_text_relative
22187 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22188 fputs ("(GOT)", asm_out_file);
22189 else
22190 fputs ("(GOTOFF)", asm_out_file);
22192 fputc ('\n', asm_out_file);
22193 return true;
22196 mode = GET_MODE (x);
22198 if (arm_vector_mode_supported_p (mode))
22200 int i, units;
22202 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22204 units = CONST_VECTOR_NUNITS (x);
22205 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22207 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22208 for (i = 0; i < units; i++)
22210 rtx elt = CONST_VECTOR_ELT (x, i);
22211 assemble_integer
22212 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22214 else
22215 for (i = 0; i < units; i++)
22217 rtx elt = CONST_VECTOR_ELT (x, i);
22218 REAL_VALUE_TYPE rval;
22220 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22222 assemble_real
22223 (rval, GET_MODE_INNER (mode),
22224 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22227 return true;
22230 return default_assemble_integer (x, size, aligned_p);
22233 static void
22234 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22236 section *s;
22238 if (!TARGET_AAPCS_BASED)
22240 (is_ctor ?
22241 default_named_section_asm_out_constructor
22242 : default_named_section_asm_out_destructor) (symbol, priority);
22243 return;
22246 /* Put these in the .init_array section, using a special relocation. */
22247 if (priority != DEFAULT_INIT_PRIORITY)
22249 char buf[18];
22250 sprintf (buf, "%s.%.5u",
22251 is_ctor ? ".init_array" : ".fini_array",
22252 priority);
22253 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22255 else if (is_ctor)
22256 s = ctors_section;
22257 else
22258 s = dtors_section;
22260 switch_to_section (s);
22261 assemble_align (POINTER_SIZE);
22262 fputs ("\t.word\t", asm_out_file);
22263 output_addr_const (asm_out_file, symbol);
22264 fputs ("(target1)\n", asm_out_file);
22267 /* Add a function to the list of static constructors. */
22269 static void
22270 arm_elf_asm_constructor (rtx symbol, int priority)
22272 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22275 /* Add a function to the list of static destructors. */
22277 static void
22278 arm_elf_asm_destructor (rtx symbol, int priority)
22280 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22283 /* A finite state machine takes care of noticing whether or not instructions
22284 can be conditionally executed, and thus decrease execution time and code
22285 size by deleting branch instructions. The fsm is controlled by
22286 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22288 /* The state of the fsm controlling condition codes are:
22289 0: normal, do nothing special
22290 1: make ASM_OUTPUT_OPCODE not output this instruction
22291 2: make ASM_OUTPUT_OPCODE not output this instruction
22292 3: make instructions conditional
22293 4: make instructions conditional
22295 State transitions (state->state by whom under condition):
22296 0 -> 1 final_prescan_insn if the `target' is a label
22297 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22298 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22299 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22300 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22301 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22302 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22303 (the target insn is arm_target_insn).
22305 If the jump clobbers the conditions then we use states 2 and 4.
22307 A similar thing can be done with conditional return insns.
22309 XXX In case the `target' is an unconditional branch, this conditionalising
22310 of the instructions always reduces code size, but not always execution
22311 time. But then, I want to reduce the code size to somewhere near what
22312 /bin/cc produces. */
22314 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22315 instructions. When a COND_EXEC instruction is seen the subsequent
22316 instructions are scanned so that multiple conditional instructions can be
22317 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22318 specify the length and true/false mask for the IT block. These will be
22319 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22321 /* Returns the index of the ARM condition code string in
22322 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22323 COMPARISON should be an rtx like `(eq (...) (...))'. */
22325 enum arm_cond_code
22326 maybe_get_arm_condition_code (rtx comparison)
22328 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22329 enum arm_cond_code code;
22330 enum rtx_code comp_code = GET_CODE (comparison);
22332 if (GET_MODE_CLASS (mode) != MODE_CC)
22333 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22334 XEXP (comparison, 1));
22336 switch (mode)
22338 case CC_DNEmode: code = ARM_NE; goto dominance;
22339 case CC_DEQmode: code = ARM_EQ; goto dominance;
22340 case CC_DGEmode: code = ARM_GE; goto dominance;
22341 case CC_DGTmode: code = ARM_GT; goto dominance;
22342 case CC_DLEmode: code = ARM_LE; goto dominance;
22343 case CC_DLTmode: code = ARM_LT; goto dominance;
22344 case CC_DGEUmode: code = ARM_CS; goto dominance;
22345 case CC_DGTUmode: code = ARM_HI; goto dominance;
22346 case CC_DLEUmode: code = ARM_LS; goto dominance;
22347 case CC_DLTUmode: code = ARM_CC;
22349 dominance:
22350 if (comp_code == EQ)
22351 return ARM_INVERSE_CONDITION_CODE (code);
22352 if (comp_code == NE)
22353 return code;
22354 return ARM_NV;
22356 case CC_NOOVmode:
22357 switch (comp_code)
22359 case NE: return ARM_NE;
22360 case EQ: return ARM_EQ;
22361 case GE: return ARM_PL;
22362 case LT: return ARM_MI;
22363 default: return ARM_NV;
22366 case CC_Zmode:
22367 switch (comp_code)
22369 case NE: return ARM_NE;
22370 case EQ: return ARM_EQ;
22371 default: return ARM_NV;
22374 case CC_Nmode:
22375 switch (comp_code)
22377 case NE: return ARM_MI;
22378 case EQ: return ARM_PL;
22379 default: return ARM_NV;
22382 case CCFPEmode:
22383 case CCFPmode:
22384 /* We can handle all cases except UNEQ and LTGT. */
22385 switch (comp_code)
22387 case GE: return ARM_GE;
22388 case GT: return ARM_GT;
22389 case LE: return ARM_LS;
22390 case LT: return ARM_MI;
22391 case NE: return ARM_NE;
22392 case EQ: return ARM_EQ;
22393 case ORDERED: return ARM_VC;
22394 case UNORDERED: return ARM_VS;
22395 case UNLT: return ARM_LT;
22396 case UNLE: return ARM_LE;
22397 case UNGT: return ARM_HI;
22398 case UNGE: return ARM_PL;
22399 /* UNEQ and LTGT do not have a representation. */
22400 case UNEQ: /* Fall through. */
22401 case LTGT: /* Fall through. */
22402 default: return ARM_NV;
22405 case CC_SWPmode:
22406 switch (comp_code)
22408 case NE: return ARM_NE;
22409 case EQ: return ARM_EQ;
22410 case GE: return ARM_LE;
22411 case GT: return ARM_LT;
22412 case LE: return ARM_GE;
22413 case LT: return ARM_GT;
22414 case GEU: return ARM_LS;
22415 case GTU: return ARM_CC;
22416 case LEU: return ARM_CS;
22417 case LTU: return ARM_HI;
22418 default: return ARM_NV;
22421 case CC_Cmode:
22422 switch (comp_code)
22424 case LTU: return ARM_CS;
22425 case GEU: return ARM_CC;
22426 default: return ARM_NV;
22429 case CC_CZmode:
22430 switch (comp_code)
22432 case NE: return ARM_NE;
22433 case EQ: return ARM_EQ;
22434 case GEU: return ARM_CS;
22435 case GTU: return ARM_HI;
22436 case LEU: return ARM_LS;
22437 case LTU: return ARM_CC;
22438 default: return ARM_NV;
22441 case CC_NCVmode:
22442 switch (comp_code)
22444 case GE: return ARM_GE;
22445 case LT: return ARM_LT;
22446 case GEU: return ARM_CS;
22447 case LTU: return ARM_CC;
22448 default: return ARM_NV;
22451 case CCmode:
22452 switch (comp_code)
22454 case NE: return ARM_NE;
22455 case EQ: return ARM_EQ;
22456 case GE: return ARM_GE;
22457 case GT: return ARM_GT;
22458 case LE: return ARM_LE;
22459 case LT: return ARM_LT;
22460 case GEU: return ARM_CS;
22461 case GTU: return ARM_HI;
22462 case LEU: return ARM_LS;
22463 case LTU: return ARM_CC;
22464 default: return ARM_NV;
22467 default: gcc_unreachable ();
22471 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22472 static enum arm_cond_code
22473 get_arm_condition_code (rtx comparison)
22475 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22476 gcc_assert (code != ARM_NV);
22477 return code;
22480 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22481 instructions. */
22482 void
22483 thumb2_final_prescan_insn (rtx_insn *insn)
22485 rtx_insn *first_insn = insn;
22486 rtx body = PATTERN (insn);
22487 rtx predicate;
22488 enum arm_cond_code code;
22489 int n;
22490 int mask;
22491 int max;
22493 /* max_insns_skipped in the tune was already taken into account in the
22494 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22495 just emit the IT blocks as we can. It does not make sense to split
22496 the IT blocks. */
22497 max = MAX_INSN_PER_IT_BLOCK;
22499 /* Remove the previous insn from the count of insns to be output. */
22500 if (arm_condexec_count)
22501 arm_condexec_count--;
22503 /* Nothing to do if we are already inside a conditional block. */
22504 if (arm_condexec_count)
22505 return;
22507 if (GET_CODE (body) != COND_EXEC)
22508 return;
22510 /* Conditional jumps are implemented directly. */
22511 if (JUMP_P (insn))
22512 return;
22514 predicate = COND_EXEC_TEST (body);
22515 arm_current_cc = get_arm_condition_code (predicate);
22517 n = get_attr_ce_count (insn);
22518 arm_condexec_count = 1;
22519 arm_condexec_mask = (1 << n) - 1;
22520 arm_condexec_masklen = n;
22521 /* See if subsequent instructions can be combined into the same block. */
22522 for (;;)
22524 insn = next_nonnote_insn (insn);
22526 /* Jumping into the middle of an IT block is illegal, so a label or
22527 barrier terminates the block. */
22528 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22529 break;
22531 body = PATTERN (insn);
22532 /* USE and CLOBBER aren't really insns, so just skip them. */
22533 if (GET_CODE (body) == USE
22534 || GET_CODE (body) == CLOBBER)
22535 continue;
22537 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22538 if (GET_CODE (body) != COND_EXEC)
22539 break;
22540 /* Maximum number of conditionally executed instructions in a block. */
22541 n = get_attr_ce_count (insn);
22542 if (arm_condexec_masklen + n > max)
22543 break;
22545 predicate = COND_EXEC_TEST (body);
22546 code = get_arm_condition_code (predicate);
22547 mask = (1 << n) - 1;
22548 if (arm_current_cc == code)
22549 arm_condexec_mask |= (mask << arm_condexec_masklen);
22550 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22551 break;
22553 arm_condexec_count++;
22554 arm_condexec_masklen += n;
22556 /* A jump must be the last instruction in a conditional block. */
22557 if (JUMP_P (insn))
22558 break;
22560 /* Restore recog_data (getting the attributes of other insns can
22561 destroy this array, but final.c assumes that it remains intact
22562 across this call). */
22563 extract_constrain_insn_cached (first_insn);
22566 void
22567 arm_final_prescan_insn (rtx_insn *insn)
22569 /* BODY will hold the body of INSN. */
22570 rtx body = PATTERN (insn);
22572 /* This will be 1 if trying to repeat the trick, and things need to be
22573 reversed if it appears to fail. */
22574 int reverse = 0;
22576 /* If we start with a return insn, we only succeed if we find another one. */
22577 int seeking_return = 0;
22578 enum rtx_code return_code = UNKNOWN;
22580 /* START_INSN will hold the insn from where we start looking. This is the
22581 first insn after the following code_label if REVERSE is true. */
22582 rtx_insn *start_insn = insn;
22584 /* If in state 4, check if the target branch is reached, in order to
22585 change back to state 0. */
22586 if (arm_ccfsm_state == 4)
22588 if (insn == arm_target_insn)
22590 arm_target_insn = NULL;
22591 arm_ccfsm_state = 0;
22593 return;
22596 /* If in state 3, it is possible to repeat the trick, if this insn is an
22597 unconditional branch to a label, and immediately following this branch
22598 is the previous target label which is only used once, and the label this
22599 branch jumps to is not too far off. */
22600 if (arm_ccfsm_state == 3)
22602 if (simplejump_p (insn))
22604 start_insn = next_nonnote_insn (start_insn);
22605 if (BARRIER_P (start_insn))
22607 /* XXX Isn't this always a barrier? */
22608 start_insn = next_nonnote_insn (start_insn);
22610 if (LABEL_P (start_insn)
22611 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22612 && LABEL_NUSES (start_insn) == 1)
22613 reverse = TRUE;
22614 else
22615 return;
22617 else if (ANY_RETURN_P (body))
22619 start_insn = next_nonnote_insn (start_insn);
22620 if (BARRIER_P (start_insn))
22621 start_insn = next_nonnote_insn (start_insn);
22622 if (LABEL_P (start_insn)
22623 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22624 && LABEL_NUSES (start_insn) == 1)
22626 reverse = TRUE;
22627 seeking_return = 1;
22628 return_code = GET_CODE (body);
22630 else
22631 return;
22633 else
22634 return;
22637 gcc_assert (!arm_ccfsm_state || reverse);
22638 if (!JUMP_P (insn))
22639 return;
22641 /* This jump might be paralleled with a clobber of the condition codes
22642 the jump should always come first */
22643 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22644 body = XVECEXP (body, 0, 0);
22646 if (reverse
22647 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22648 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22650 int insns_skipped;
22651 int fail = FALSE, succeed = FALSE;
22652 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22653 int then_not_else = TRUE;
22654 rtx_insn *this_insn = start_insn;
22655 rtx label = 0;
22657 /* Register the insn jumped to. */
22658 if (reverse)
22660 if (!seeking_return)
22661 label = XEXP (SET_SRC (body), 0);
22663 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22664 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22665 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22667 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22668 then_not_else = FALSE;
22670 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22672 seeking_return = 1;
22673 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22675 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22677 seeking_return = 1;
22678 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22679 then_not_else = FALSE;
22681 else
22682 gcc_unreachable ();
22684 /* See how many insns this branch skips, and what kind of insns. If all
22685 insns are okay, and the label or unconditional branch to the same
22686 label is not too far away, succeed. */
22687 for (insns_skipped = 0;
22688 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22690 rtx scanbody;
22692 this_insn = next_nonnote_insn (this_insn);
22693 if (!this_insn)
22694 break;
22696 switch (GET_CODE (this_insn))
22698 case CODE_LABEL:
22699 /* Succeed if it is the target label, otherwise fail since
22700 control falls in from somewhere else. */
22701 if (this_insn == label)
22703 arm_ccfsm_state = 1;
22704 succeed = TRUE;
22706 else
22707 fail = TRUE;
22708 break;
22710 case BARRIER:
22711 /* Succeed if the following insn is the target label.
22712 Otherwise fail.
22713 If return insns are used then the last insn in a function
22714 will be a barrier. */
22715 this_insn = next_nonnote_insn (this_insn);
22716 if (this_insn && this_insn == label)
22718 arm_ccfsm_state = 1;
22719 succeed = TRUE;
22721 else
22722 fail = TRUE;
22723 break;
22725 case CALL_INSN:
22726 /* The AAPCS says that conditional calls should not be
22727 used since they make interworking inefficient (the
22728 linker can't transform BL<cond> into BLX). That's
22729 only a problem if the machine has BLX. */
22730 if (arm_arch5)
22732 fail = TRUE;
22733 break;
22736 /* Succeed if the following insn is the target label, or
22737 if the following two insns are a barrier and the
22738 target label. */
22739 this_insn = next_nonnote_insn (this_insn);
22740 if (this_insn && BARRIER_P (this_insn))
22741 this_insn = next_nonnote_insn (this_insn);
22743 if (this_insn && this_insn == label
22744 && insns_skipped < max_insns_skipped)
22746 arm_ccfsm_state = 1;
22747 succeed = TRUE;
22749 else
22750 fail = TRUE;
22751 break;
22753 case JUMP_INSN:
22754 /* If this is an unconditional branch to the same label, succeed.
22755 If it is to another label, do nothing. If it is conditional,
22756 fail. */
22757 /* XXX Probably, the tests for SET and the PC are
22758 unnecessary. */
22760 scanbody = PATTERN (this_insn);
22761 if (GET_CODE (scanbody) == SET
22762 && GET_CODE (SET_DEST (scanbody)) == PC)
22764 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22765 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22767 arm_ccfsm_state = 2;
22768 succeed = TRUE;
22770 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22771 fail = TRUE;
22773 /* Fail if a conditional return is undesirable (e.g. on a
22774 StrongARM), but still allow this if optimizing for size. */
22775 else if (GET_CODE (scanbody) == return_code
22776 && !use_return_insn (TRUE, NULL)
22777 && !optimize_size)
22778 fail = TRUE;
22779 else if (GET_CODE (scanbody) == return_code)
22781 arm_ccfsm_state = 2;
22782 succeed = TRUE;
22784 else if (GET_CODE (scanbody) == PARALLEL)
22786 switch (get_attr_conds (this_insn))
22788 case CONDS_NOCOND:
22789 break;
22790 default:
22791 fail = TRUE;
22792 break;
22795 else
22796 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
22798 break;
22800 case INSN:
22801 /* Instructions using or affecting the condition codes make it
22802 fail. */
22803 scanbody = PATTERN (this_insn);
22804 if (!(GET_CODE (scanbody) == SET
22805 || GET_CODE (scanbody) == PARALLEL)
22806 || get_attr_conds (this_insn) != CONDS_NOCOND)
22807 fail = TRUE;
22808 break;
22810 default:
22811 break;
22814 if (succeed)
22816 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22817 arm_target_label = CODE_LABEL_NUMBER (label);
22818 else
22820 gcc_assert (seeking_return || arm_ccfsm_state == 2);
22822 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22824 this_insn = next_nonnote_insn (this_insn);
22825 gcc_assert (!this_insn
22826 || (!BARRIER_P (this_insn)
22827 && !LABEL_P (this_insn)));
22829 if (!this_insn)
22831 /* Oh, dear! we ran off the end.. give up. */
22832 extract_constrain_insn_cached (insn);
22833 arm_ccfsm_state = 0;
22834 arm_target_insn = NULL;
22835 return;
22837 arm_target_insn = this_insn;
22840 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22841 what it was. */
22842 if (!reverse)
22843 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22845 if (reverse || then_not_else)
22846 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22849 /* Restore recog_data (getting the attributes of other insns can
22850 destroy this array, but final.c assumes that it remains intact
22851 across this call. */
22852 extract_constrain_insn_cached (insn);
22856 /* Output IT instructions. */
22857 void
22858 thumb2_asm_output_opcode (FILE * stream)
22860 char buff[5];
22861 int n;
22863 if (arm_condexec_mask)
22865 for (n = 0; n < arm_condexec_masklen; n++)
22866 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
22867 buff[n] = 0;
22868 asm_fprintf(stream, "i%s\t%s\n\t", buff,
22869 arm_condition_codes[arm_current_cc]);
22870 arm_condexec_mask = 0;
22874 /* Returns true if REGNO is a valid register
22875 for holding a quantity of type MODE. */
22877 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
22879 if (GET_MODE_CLASS (mode) == MODE_CC)
22880 return (regno == CC_REGNUM
22881 || (TARGET_HARD_FLOAT && TARGET_VFP
22882 && regno == VFPCC_REGNUM));
22884 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
22885 return false;
22887 if (TARGET_THUMB1)
22888 /* For the Thumb we only allow values bigger than SImode in
22889 registers 0 - 6, so that there is always a second low
22890 register available to hold the upper part of the value.
22891 We probably we ought to ensure that the register is the
22892 start of an even numbered register pair. */
22893 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
22895 if (TARGET_HARD_FLOAT && TARGET_VFP
22896 && IS_VFP_REGNUM (regno))
22898 if (mode == SFmode || mode == SImode)
22899 return VFP_REGNO_OK_FOR_SINGLE (regno);
22901 if (mode == DFmode)
22902 return VFP_REGNO_OK_FOR_DOUBLE (regno);
22904 /* VFP registers can hold HFmode values, but there is no point in
22905 putting them there unless we have hardware conversion insns. */
22906 if (mode == HFmode)
22907 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
22909 if (TARGET_NEON)
22910 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
22911 || (VALID_NEON_QREG_MODE (mode)
22912 && NEON_REGNO_OK_FOR_QUAD (regno))
22913 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
22914 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
22915 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
22916 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
22917 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
22919 return FALSE;
22922 if (TARGET_REALLY_IWMMXT)
22924 if (IS_IWMMXT_GR_REGNUM (regno))
22925 return mode == SImode;
22927 if (IS_IWMMXT_REGNUM (regno))
22928 return VALID_IWMMXT_REG_MODE (mode);
22931 /* We allow almost any value to be stored in the general registers.
22932 Restrict doubleword quantities to even register pairs in ARM state
22933 so that we can use ldrd. Do not allow very large Neon structure
22934 opaque modes in general registers; they would use too many. */
22935 if (regno <= LAST_ARM_REGNUM)
22937 if (ARM_NUM_REGS (mode) > 4)
22938 return FALSE;
22940 if (TARGET_THUMB2)
22941 return TRUE;
22943 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
22946 if (regno == FRAME_POINTER_REGNUM
22947 || regno == ARG_POINTER_REGNUM)
22948 /* We only allow integers in the fake hard registers. */
22949 return GET_MODE_CLASS (mode) == MODE_INT;
22951 return FALSE;
22954 /* Implement MODES_TIEABLE_P. */
22956 bool
22957 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
22959 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
22960 return true;
22962 /* We specifically want to allow elements of "structure" modes to
22963 be tieable to the structure. This more general condition allows
22964 other rarer situations too. */
22965 if (TARGET_NEON
22966 && (VALID_NEON_DREG_MODE (mode1)
22967 || VALID_NEON_QREG_MODE (mode1)
22968 || VALID_NEON_STRUCT_MODE (mode1))
22969 && (VALID_NEON_DREG_MODE (mode2)
22970 || VALID_NEON_QREG_MODE (mode2)
22971 || VALID_NEON_STRUCT_MODE (mode2)))
22972 return true;
22974 return false;
22977 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
22978 not used in arm mode. */
22980 enum reg_class
22981 arm_regno_class (int regno)
22983 if (regno == PC_REGNUM)
22984 return NO_REGS;
22986 if (TARGET_THUMB1)
22988 if (regno == STACK_POINTER_REGNUM)
22989 return STACK_REG;
22990 if (regno == CC_REGNUM)
22991 return CC_REG;
22992 if (regno < 8)
22993 return LO_REGS;
22994 return HI_REGS;
22997 if (TARGET_THUMB2 && regno < 8)
22998 return LO_REGS;
23000 if ( regno <= LAST_ARM_REGNUM
23001 || regno == FRAME_POINTER_REGNUM
23002 || regno == ARG_POINTER_REGNUM)
23003 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23005 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23006 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23008 if (IS_VFP_REGNUM (regno))
23010 if (regno <= D7_VFP_REGNUM)
23011 return VFP_D0_D7_REGS;
23012 else if (regno <= LAST_LO_VFP_REGNUM)
23013 return VFP_LO_REGS;
23014 else
23015 return VFP_HI_REGS;
23018 if (IS_IWMMXT_REGNUM (regno))
23019 return IWMMXT_REGS;
23021 if (IS_IWMMXT_GR_REGNUM (regno))
23022 return IWMMXT_GR_REGS;
23024 return NO_REGS;
23027 /* Handle a special case when computing the offset
23028 of an argument from the frame pointer. */
23030 arm_debugger_arg_offset (int value, rtx addr)
23032 rtx_insn *insn;
23034 /* We are only interested if dbxout_parms() failed to compute the offset. */
23035 if (value != 0)
23036 return 0;
23038 /* We can only cope with the case where the address is held in a register. */
23039 if (!REG_P (addr))
23040 return 0;
23042 /* If we are using the frame pointer to point at the argument, then
23043 an offset of 0 is correct. */
23044 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23045 return 0;
23047 /* If we are using the stack pointer to point at the
23048 argument, then an offset of 0 is correct. */
23049 /* ??? Check this is consistent with thumb2 frame layout. */
23050 if ((TARGET_THUMB || !frame_pointer_needed)
23051 && REGNO (addr) == SP_REGNUM)
23052 return 0;
23054 /* Oh dear. The argument is pointed to by a register rather
23055 than being held in a register, or being stored at a known
23056 offset from the frame pointer. Since GDB only understands
23057 those two kinds of argument we must translate the address
23058 held in the register into an offset from the frame pointer.
23059 We do this by searching through the insns for the function
23060 looking to see where this register gets its value. If the
23061 register is initialized from the frame pointer plus an offset
23062 then we are in luck and we can continue, otherwise we give up.
23064 This code is exercised by producing debugging information
23065 for a function with arguments like this:
23067 double func (double a, double b, int c, double d) {return d;}
23069 Without this code the stab for parameter 'd' will be set to
23070 an offset of 0 from the frame pointer, rather than 8. */
23072 /* The if() statement says:
23074 If the insn is a normal instruction
23075 and if the insn is setting the value in a register
23076 and if the register being set is the register holding the address of the argument
23077 and if the address is computing by an addition
23078 that involves adding to a register
23079 which is the frame pointer
23080 a constant integer
23082 then... */
23084 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23086 if ( NONJUMP_INSN_P (insn)
23087 && GET_CODE (PATTERN (insn)) == SET
23088 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23089 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23090 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23091 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23092 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23095 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23097 break;
23101 if (value == 0)
23103 debug_rtx (addr);
23104 warning (0, "unable to compute real location of stacked parameter");
23105 value = 8; /* XXX magic hack */
23108 return value;
23111 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23113 static const char *
23114 arm_invalid_parameter_type (const_tree t)
23116 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23117 return N_("function parameters cannot have __fp16 type");
23118 return NULL;
23121 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23123 static const char *
23124 arm_invalid_return_type (const_tree t)
23126 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23127 return N_("functions cannot return __fp16 type");
23128 return NULL;
23131 /* Implement TARGET_PROMOTED_TYPE. */
23133 static tree
23134 arm_promoted_type (const_tree t)
23136 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23137 return float_type_node;
23138 return NULL_TREE;
23141 /* Implement TARGET_CONVERT_TO_TYPE.
23142 Specifically, this hook implements the peculiarity of the ARM
23143 half-precision floating-point C semantics that requires conversions between
23144 __fp16 to or from double to do an intermediate conversion to float. */
23146 static tree
23147 arm_convert_to_type (tree type, tree expr)
23149 tree fromtype = TREE_TYPE (expr);
23150 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
23151 return NULL_TREE;
23152 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
23153 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
23154 return convert (type, convert (float_type_node, expr));
23155 return NULL_TREE;
23158 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23159 This simply adds HFmode as a supported mode; even though we don't
23160 implement arithmetic on this type directly, it's supported by
23161 optabs conversions, much the way the double-word arithmetic is
23162 special-cased in the default hook. */
23164 static bool
23165 arm_scalar_mode_supported_p (machine_mode mode)
23167 if (mode == HFmode)
23168 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23169 else if (ALL_FIXED_POINT_MODE_P (mode))
23170 return true;
23171 else
23172 return default_scalar_mode_supported_p (mode);
23175 /* Emit code to reinterpret one Neon type as another, without altering bits. */
23176 void
23177 neon_reinterpret (rtx dest, rtx src)
23179 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
23182 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23183 not to early-clobber SRC registers in the process.
23185 We assume that the operands described by SRC and DEST represent a
23186 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23187 number of components into which the copy has been decomposed. */
23188 void
23189 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23191 unsigned int i;
23193 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23194 || REGNO (operands[0]) < REGNO (operands[1]))
23196 for (i = 0; i < count; i++)
23198 operands[2 * i] = dest[i];
23199 operands[2 * i + 1] = src[i];
23202 else
23204 for (i = 0; i < count; i++)
23206 operands[2 * i] = dest[count - i - 1];
23207 operands[2 * i + 1] = src[count - i - 1];
23212 /* Split operands into moves from op[1] + op[2] into op[0]. */
23214 void
23215 neon_split_vcombine (rtx operands[3])
23217 unsigned int dest = REGNO (operands[0]);
23218 unsigned int src1 = REGNO (operands[1]);
23219 unsigned int src2 = REGNO (operands[2]);
23220 machine_mode halfmode = GET_MODE (operands[1]);
23221 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23222 rtx destlo, desthi;
23224 if (src1 == dest && src2 == dest + halfregs)
23226 /* No-op move. Can't split to nothing; emit something. */
23227 emit_note (NOTE_INSN_DELETED);
23228 return;
23231 /* Preserve register attributes for variable tracking. */
23232 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23233 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23234 GET_MODE_SIZE (halfmode));
23236 /* Special case of reversed high/low parts. Use VSWP. */
23237 if (src2 == dest && src1 == dest + halfregs)
23239 rtx x = gen_rtx_SET (destlo, operands[1]);
23240 rtx y = gen_rtx_SET (desthi, operands[2]);
23241 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23242 return;
23245 if (!reg_overlap_mentioned_p (operands[2], destlo))
23247 /* Try to avoid unnecessary moves if part of the result
23248 is in the right place already. */
23249 if (src1 != dest)
23250 emit_move_insn (destlo, operands[1]);
23251 if (src2 != dest + halfregs)
23252 emit_move_insn (desthi, operands[2]);
23254 else
23256 if (src2 != dest + halfregs)
23257 emit_move_insn (desthi, operands[2]);
23258 if (src1 != dest)
23259 emit_move_insn (destlo, operands[1]);
23263 /* Return the number (counting from 0) of
23264 the least significant set bit in MASK. */
23266 inline static int
23267 number_of_first_bit_set (unsigned mask)
23269 return ctz_hwi (mask);
23272 /* Like emit_multi_reg_push, but allowing for a different set of
23273 registers to be described as saved. MASK is the set of registers
23274 to be saved; REAL_REGS is the set of registers to be described as
23275 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23277 static rtx_insn *
23278 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23280 unsigned long regno;
23281 rtx par[10], tmp, reg;
23282 rtx_insn *insn;
23283 int i, j;
23285 /* Build the parallel of the registers actually being stored. */
23286 for (i = 0; mask; ++i, mask &= mask - 1)
23288 regno = ctz_hwi (mask);
23289 reg = gen_rtx_REG (SImode, regno);
23291 if (i == 0)
23292 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23293 else
23294 tmp = gen_rtx_USE (VOIDmode, reg);
23296 par[i] = tmp;
23299 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23300 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23301 tmp = gen_frame_mem (BLKmode, tmp);
23302 tmp = gen_rtx_SET (tmp, par[0]);
23303 par[0] = tmp;
23305 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23306 insn = emit_insn (tmp);
23308 /* Always build the stack adjustment note for unwind info. */
23309 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23310 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23311 par[0] = tmp;
23313 /* Build the parallel of the registers recorded as saved for unwind. */
23314 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23316 regno = ctz_hwi (real_regs);
23317 reg = gen_rtx_REG (SImode, regno);
23319 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23320 tmp = gen_frame_mem (SImode, tmp);
23321 tmp = gen_rtx_SET (tmp, reg);
23322 RTX_FRAME_RELATED_P (tmp) = 1;
23323 par[j + 1] = tmp;
23326 if (j == 0)
23327 tmp = par[0];
23328 else
23330 RTX_FRAME_RELATED_P (par[0]) = 1;
23331 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23334 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23336 return insn;
23339 /* Emit code to push or pop registers to or from the stack. F is the
23340 assembly file. MASK is the registers to pop. */
23341 static void
23342 thumb_pop (FILE *f, unsigned long mask)
23344 int regno;
23345 int lo_mask = mask & 0xFF;
23346 int pushed_words = 0;
23348 gcc_assert (mask);
23350 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23352 /* Special case. Do not generate a POP PC statement here, do it in
23353 thumb_exit() */
23354 thumb_exit (f, -1);
23355 return;
23358 fprintf (f, "\tpop\t{");
23360 /* Look at the low registers first. */
23361 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23363 if (lo_mask & 1)
23365 asm_fprintf (f, "%r", regno);
23367 if ((lo_mask & ~1) != 0)
23368 fprintf (f, ", ");
23370 pushed_words++;
23374 if (mask & (1 << PC_REGNUM))
23376 /* Catch popping the PC. */
23377 if (TARGET_INTERWORK || TARGET_BACKTRACE
23378 || crtl->calls_eh_return)
23380 /* The PC is never poped directly, instead
23381 it is popped into r3 and then BX is used. */
23382 fprintf (f, "}\n");
23384 thumb_exit (f, -1);
23386 return;
23388 else
23390 if (mask & 0xFF)
23391 fprintf (f, ", ");
23393 asm_fprintf (f, "%r", PC_REGNUM);
23397 fprintf (f, "}\n");
23400 /* Generate code to return from a thumb function.
23401 If 'reg_containing_return_addr' is -1, then the return address is
23402 actually on the stack, at the stack pointer. */
23403 static void
23404 thumb_exit (FILE *f, int reg_containing_return_addr)
23406 unsigned regs_available_for_popping;
23407 unsigned regs_to_pop;
23408 int pops_needed;
23409 unsigned available;
23410 unsigned required;
23411 machine_mode mode;
23412 int size;
23413 int restore_a4 = FALSE;
23415 /* Compute the registers we need to pop. */
23416 regs_to_pop = 0;
23417 pops_needed = 0;
23419 if (reg_containing_return_addr == -1)
23421 regs_to_pop |= 1 << LR_REGNUM;
23422 ++pops_needed;
23425 if (TARGET_BACKTRACE)
23427 /* Restore the (ARM) frame pointer and stack pointer. */
23428 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23429 pops_needed += 2;
23432 /* If there is nothing to pop then just emit the BX instruction and
23433 return. */
23434 if (pops_needed == 0)
23436 if (crtl->calls_eh_return)
23437 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23439 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23440 return;
23442 /* Otherwise if we are not supporting interworking and we have not created
23443 a backtrace structure and the function was not entered in ARM mode then
23444 just pop the return address straight into the PC. */
23445 else if (!TARGET_INTERWORK
23446 && !TARGET_BACKTRACE
23447 && !is_called_in_ARM_mode (current_function_decl)
23448 && !crtl->calls_eh_return)
23450 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23451 return;
23454 /* Find out how many of the (return) argument registers we can corrupt. */
23455 regs_available_for_popping = 0;
23457 /* If returning via __builtin_eh_return, the bottom three registers
23458 all contain information needed for the return. */
23459 if (crtl->calls_eh_return)
23460 size = 12;
23461 else
23463 /* If we can deduce the registers used from the function's
23464 return value. This is more reliable that examining
23465 df_regs_ever_live_p () because that will be set if the register is
23466 ever used in the function, not just if the register is used
23467 to hold a return value. */
23469 if (crtl->return_rtx != 0)
23470 mode = GET_MODE (crtl->return_rtx);
23471 else
23472 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23474 size = GET_MODE_SIZE (mode);
23476 if (size == 0)
23478 /* In a void function we can use any argument register.
23479 In a function that returns a structure on the stack
23480 we can use the second and third argument registers. */
23481 if (mode == VOIDmode)
23482 regs_available_for_popping =
23483 (1 << ARG_REGISTER (1))
23484 | (1 << ARG_REGISTER (2))
23485 | (1 << ARG_REGISTER (3));
23486 else
23487 regs_available_for_popping =
23488 (1 << ARG_REGISTER (2))
23489 | (1 << ARG_REGISTER (3));
23491 else if (size <= 4)
23492 regs_available_for_popping =
23493 (1 << ARG_REGISTER (2))
23494 | (1 << ARG_REGISTER (3));
23495 else if (size <= 8)
23496 regs_available_for_popping =
23497 (1 << ARG_REGISTER (3));
23500 /* Match registers to be popped with registers into which we pop them. */
23501 for (available = regs_available_for_popping,
23502 required = regs_to_pop;
23503 required != 0 && available != 0;
23504 available &= ~(available & - available),
23505 required &= ~(required & - required))
23506 -- pops_needed;
23508 /* If we have any popping registers left over, remove them. */
23509 if (available > 0)
23510 regs_available_for_popping &= ~available;
23512 /* Otherwise if we need another popping register we can use
23513 the fourth argument register. */
23514 else if (pops_needed)
23516 /* If we have not found any free argument registers and
23517 reg a4 contains the return address, we must move it. */
23518 if (regs_available_for_popping == 0
23519 && reg_containing_return_addr == LAST_ARG_REGNUM)
23521 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23522 reg_containing_return_addr = LR_REGNUM;
23524 else if (size > 12)
23526 /* Register a4 is being used to hold part of the return value,
23527 but we have dire need of a free, low register. */
23528 restore_a4 = TRUE;
23530 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23533 if (reg_containing_return_addr != LAST_ARG_REGNUM)
23535 /* The fourth argument register is available. */
23536 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23538 --pops_needed;
23542 /* Pop as many registers as we can. */
23543 thumb_pop (f, regs_available_for_popping);
23545 /* Process the registers we popped. */
23546 if (reg_containing_return_addr == -1)
23548 /* The return address was popped into the lowest numbered register. */
23549 regs_to_pop &= ~(1 << LR_REGNUM);
23551 reg_containing_return_addr =
23552 number_of_first_bit_set (regs_available_for_popping);
23554 /* Remove this register for the mask of available registers, so that
23555 the return address will not be corrupted by further pops. */
23556 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
23559 /* If we popped other registers then handle them here. */
23560 if (regs_available_for_popping)
23562 int frame_pointer;
23564 /* Work out which register currently contains the frame pointer. */
23565 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
23567 /* Move it into the correct place. */
23568 asm_fprintf (f, "\tmov\t%r, %r\n",
23569 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
23571 /* (Temporarily) remove it from the mask of popped registers. */
23572 regs_available_for_popping &= ~(1 << frame_pointer);
23573 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
23575 if (regs_available_for_popping)
23577 int stack_pointer;
23579 /* We popped the stack pointer as well,
23580 find the register that contains it. */
23581 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
23583 /* Move it into the stack register. */
23584 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
23586 /* At this point we have popped all necessary registers, so
23587 do not worry about restoring regs_available_for_popping
23588 to its correct value:
23590 assert (pops_needed == 0)
23591 assert (regs_available_for_popping == (1 << frame_pointer))
23592 assert (regs_to_pop == (1 << STACK_POINTER)) */
23594 else
23596 /* Since we have just move the popped value into the frame
23597 pointer, the popping register is available for reuse, and
23598 we know that we still have the stack pointer left to pop. */
23599 regs_available_for_popping |= (1 << frame_pointer);
23603 /* If we still have registers left on the stack, but we no longer have
23604 any registers into which we can pop them, then we must move the return
23605 address into the link register and make available the register that
23606 contained it. */
23607 if (regs_available_for_popping == 0 && pops_needed > 0)
23609 regs_available_for_popping |= 1 << reg_containing_return_addr;
23611 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
23612 reg_containing_return_addr);
23614 reg_containing_return_addr = LR_REGNUM;
23617 /* If we have registers left on the stack then pop some more.
23618 We know that at most we will want to pop FP and SP. */
23619 if (pops_needed > 0)
23621 int popped_into;
23622 int move_to;
23624 thumb_pop (f, regs_available_for_popping);
23626 /* We have popped either FP or SP.
23627 Move whichever one it is into the correct register. */
23628 popped_into = number_of_first_bit_set (regs_available_for_popping);
23629 move_to = number_of_first_bit_set (regs_to_pop);
23631 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
23633 regs_to_pop &= ~(1 << move_to);
23635 --pops_needed;
23638 /* If we still have not popped everything then we must have only
23639 had one register available to us and we are now popping the SP. */
23640 if (pops_needed > 0)
23642 int popped_into;
23644 thumb_pop (f, regs_available_for_popping);
23646 popped_into = number_of_first_bit_set (regs_available_for_popping);
23648 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
23650 assert (regs_to_pop == (1 << STACK_POINTER))
23651 assert (pops_needed == 1)
23655 /* If necessary restore the a4 register. */
23656 if (restore_a4)
23658 if (reg_containing_return_addr != LR_REGNUM)
23660 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23661 reg_containing_return_addr = LR_REGNUM;
23664 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
23667 if (crtl->calls_eh_return)
23668 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23670 /* Return to caller. */
23671 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23674 /* Scan INSN just before assembler is output for it.
23675 For Thumb-1, we track the status of the condition codes; this
23676 information is used in the cbranchsi4_insn pattern. */
23677 void
23678 thumb1_final_prescan_insn (rtx_insn *insn)
23680 if (flag_print_asm_name)
23681 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
23682 INSN_ADDRESSES (INSN_UID (insn)));
23683 /* Don't overwrite the previous setter when we get to a cbranch. */
23684 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
23686 enum attr_conds conds;
23688 if (cfun->machine->thumb1_cc_insn)
23690 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
23691 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
23692 CC_STATUS_INIT;
23694 conds = get_attr_conds (insn);
23695 if (conds == CONDS_SET)
23697 rtx set = single_set (insn);
23698 cfun->machine->thumb1_cc_insn = insn;
23699 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
23700 cfun->machine->thumb1_cc_op1 = const0_rtx;
23701 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
23702 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
23704 rtx src1 = XEXP (SET_SRC (set), 1);
23705 if (src1 == const0_rtx)
23706 cfun->machine->thumb1_cc_mode = CCmode;
23708 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
23710 /* Record the src register operand instead of dest because
23711 cprop_hardreg pass propagates src. */
23712 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
23715 else if (conds != CONDS_NOCOND)
23716 cfun->machine->thumb1_cc_insn = NULL_RTX;
23719 /* Check if unexpected far jump is used. */
23720 if (cfun->machine->lr_save_eliminated
23721 && get_attr_far_jump (insn) == FAR_JUMP_YES)
23722 internal_error("Unexpected thumb1 far jump");
23726 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
23728 unsigned HOST_WIDE_INT mask = 0xff;
23729 int i;
23731 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
23732 if (val == 0) /* XXX */
23733 return 0;
23735 for (i = 0; i < 25; i++)
23736 if ((val & (mask << i)) == val)
23737 return 1;
23739 return 0;
23742 /* Returns nonzero if the current function contains,
23743 or might contain a far jump. */
23744 static int
23745 thumb_far_jump_used_p (void)
23747 rtx_insn *insn;
23748 bool far_jump = false;
23749 unsigned int func_size = 0;
23751 /* This test is only important for leaf functions. */
23752 /* assert (!leaf_function_p ()); */
23754 /* If we have already decided that far jumps may be used,
23755 do not bother checking again, and always return true even if
23756 it turns out that they are not being used. Once we have made
23757 the decision that far jumps are present (and that hence the link
23758 register will be pushed onto the stack) we cannot go back on it. */
23759 if (cfun->machine->far_jump_used)
23760 return 1;
23762 /* If this function is not being called from the prologue/epilogue
23763 generation code then it must be being called from the
23764 INITIAL_ELIMINATION_OFFSET macro. */
23765 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
23767 /* In this case we know that we are being asked about the elimination
23768 of the arg pointer register. If that register is not being used,
23769 then there are no arguments on the stack, and we do not have to
23770 worry that a far jump might force the prologue to push the link
23771 register, changing the stack offsets. In this case we can just
23772 return false, since the presence of far jumps in the function will
23773 not affect stack offsets.
23775 If the arg pointer is live (or if it was live, but has now been
23776 eliminated and so set to dead) then we do have to test to see if
23777 the function might contain a far jump. This test can lead to some
23778 false negatives, since before reload is completed, then length of
23779 branch instructions is not known, so gcc defaults to returning their
23780 longest length, which in turn sets the far jump attribute to true.
23782 A false negative will not result in bad code being generated, but it
23783 will result in a needless push and pop of the link register. We
23784 hope that this does not occur too often.
23786 If we need doubleword stack alignment this could affect the other
23787 elimination offsets so we can't risk getting it wrong. */
23788 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
23789 cfun->machine->arg_pointer_live = 1;
23790 else if (!cfun->machine->arg_pointer_live)
23791 return 0;
23794 /* We should not change far_jump_used during or after reload, as there is
23795 no chance to change stack frame layout. */
23796 if (reload_in_progress || reload_completed)
23797 return 0;
23799 /* Check to see if the function contains a branch
23800 insn with the far jump attribute set. */
23801 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23803 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
23805 far_jump = true;
23807 func_size += get_attr_length (insn);
23810 /* Attribute far_jump will always be true for thumb1 before
23811 shorten_branch pass. So checking far_jump attribute before
23812 shorten_branch isn't much useful.
23814 Following heuristic tries to estimate more accurately if a far jump
23815 may finally be used. The heuristic is very conservative as there is
23816 no chance to roll-back the decision of not to use far jump.
23818 Thumb1 long branch offset is -2048 to 2046. The worst case is each
23819 2-byte insn is associated with a 4 byte constant pool. Using
23820 function size 2048/3 as the threshold is conservative enough. */
23821 if (far_jump)
23823 if ((func_size * 3) >= 2048)
23825 /* Record the fact that we have decided that
23826 the function does use far jumps. */
23827 cfun->machine->far_jump_used = 1;
23828 return 1;
23832 return 0;
23835 /* Return nonzero if FUNC must be entered in ARM mode. */
23837 is_called_in_ARM_mode (tree func)
23839 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
23841 /* Ignore the problem about functions whose address is taken. */
23842 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
23843 return TRUE;
23845 #ifdef ARM_PE
23846 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
23847 #else
23848 return FALSE;
23849 #endif
23852 /* Given the stack offsets and register mask in OFFSETS, decide how
23853 many additional registers to push instead of subtracting a constant
23854 from SP. For epilogues the principle is the same except we use pop.
23855 FOR_PROLOGUE indicates which we're generating. */
23856 static int
23857 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
23859 HOST_WIDE_INT amount;
23860 unsigned long live_regs_mask = offsets->saved_regs_mask;
23861 /* Extract a mask of the ones we can give to the Thumb's push/pop
23862 instruction. */
23863 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
23864 /* Then count how many other high registers will need to be pushed. */
23865 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23866 int n_free, reg_base, size;
23868 if (!for_prologue && frame_pointer_needed)
23869 amount = offsets->locals_base - offsets->saved_regs;
23870 else
23871 amount = offsets->outgoing_args - offsets->saved_regs;
23873 /* If the stack frame size is 512 exactly, we can save one load
23874 instruction, which should make this a win even when optimizing
23875 for speed. */
23876 if (!optimize_size && amount != 512)
23877 return 0;
23879 /* Can't do this if there are high registers to push. */
23880 if (high_regs_pushed != 0)
23881 return 0;
23883 /* Shouldn't do it in the prologue if no registers would normally
23884 be pushed at all. In the epilogue, also allow it if we'll have
23885 a pop insn for the PC. */
23886 if (l_mask == 0
23887 && (for_prologue
23888 || TARGET_BACKTRACE
23889 || (live_regs_mask & 1 << LR_REGNUM) == 0
23890 || TARGET_INTERWORK
23891 || crtl->args.pretend_args_size != 0))
23892 return 0;
23894 /* Don't do this if thumb_expand_prologue wants to emit instructions
23895 between the push and the stack frame allocation. */
23896 if (for_prologue
23897 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
23898 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
23899 return 0;
23901 reg_base = 0;
23902 n_free = 0;
23903 if (!for_prologue)
23905 size = arm_size_return_regs ();
23906 reg_base = ARM_NUM_INTS (size);
23907 live_regs_mask >>= reg_base;
23910 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
23911 && (for_prologue || call_used_regs[reg_base + n_free]))
23913 live_regs_mask >>= 1;
23914 n_free++;
23917 if (n_free == 0)
23918 return 0;
23919 gcc_assert (amount / 4 * 4 == amount);
23921 if (amount >= 512 && (amount - n_free * 4) < 512)
23922 return (amount - 508) / 4;
23923 if (amount <= n_free * 4)
23924 return amount / 4;
23925 return 0;
23928 /* The bits which aren't usefully expanded as rtl. */
23929 const char *
23930 thumb1_unexpanded_epilogue (void)
23932 arm_stack_offsets *offsets;
23933 int regno;
23934 unsigned long live_regs_mask = 0;
23935 int high_regs_pushed = 0;
23936 int extra_pop;
23937 int had_to_push_lr;
23938 int size;
23940 if (cfun->machine->return_used_this_function != 0)
23941 return "";
23943 if (IS_NAKED (arm_current_func_type ()))
23944 return "";
23946 offsets = arm_get_frame_offsets ();
23947 live_regs_mask = offsets->saved_regs_mask;
23948 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23950 /* If we can deduce the registers used from the function's return value.
23951 This is more reliable that examining df_regs_ever_live_p () because that
23952 will be set if the register is ever used in the function, not just if
23953 the register is used to hold a return value. */
23954 size = arm_size_return_regs ();
23956 extra_pop = thumb1_extra_regs_pushed (offsets, false);
23957 if (extra_pop > 0)
23959 unsigned long extra_mask = (1 << extra_pop) - 1;
23960 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
23963 /* The prolog may have pushed some high registers to use as
23964 work registers. e.g. the testsuite file:
23965 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
23966 compiles to produce:
23967 push {r4, r5, r6, r7, lr}
23968 mov r7, r9
23969 mov r6, r8
23970 push {r6, r7}
23971 as part of the prolog. We have to undo that pushing here. */
23973 if (high_regs_pushed)
23975 unsigned long mask = live_regs_mask & 0xff;
23976 int next_hi_reg;
23978 /* The available low registers depend on the size of the value we are
23979 returning. */
23980 if (size <= 12)
23981 mask |= 1 << 3;
23982 if (size <= 8)
23983 mask |= 1 << 2;
23985 if (mask == 0)
23986 /* Oh dear! We have no low registers into which we can pop
23987 high registers! */
23988 internal_error
23989 ("no low registers available for popping high registers");
23991 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
23992 if (live_regs_mask & (1 << next_hi_reg))
23993 break;
23995 while (high_regs_pushed)
23997 /* Find lo register(s) into which the high register(s) can
23998 be popped. */
23999 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24001 if (mask & (1 << regno))
24002 high_regs_pushed--;
24003 if (high_regs_pushed == 0)
24004 break;
24007 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24009 /* Pop the values into the low register(s). */
24010 thumb_pop (asm_out_file, mask);
24012 /* Move the value(s) into the high registers. */
24013 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24015 if (mask & (1 << regno))
24017 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24018 regno);
24020 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24021 if (live_regs_mask & (1 << next_hi_reg))
24022 break;
24026 live_regs_mask &= ~0x0f00;
24029 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24030 live_regs_mask &= 0xff;
24032 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24034 /* Pop the return address into the PC. */
24035 if (had_to_push_lr)
24036 live_regs_mask |= 1 << PC_REGNUM;
24038 /* Either no argument registers were pushed or a backtrace
24039 structure was created which includes an adjusted stack
24040 pointer, so just pop everything. */
24041 if (live_regs_mask)
24042 thumb_pop (asm_out_file, live_regs_mask);
24044 /* We have either just popped the return address into the
24045 PC or it is was kept in LR for the entire function.
24046 Note that thumb_pop has already called thumb_exit if the
24047 PC was in the list. */
24048 if (!had_to_push_lr)
24049 thumb_exit (asm_out_file, LR_REGNUM);
24051 else
24053 /* Pop everything but the return address. */
24054 if (live_regs_mask)
24055 thumb_pop (asm_out_file, live_regs_mask);
24057 if (had_to_push_lr)
24059 if (size > 12)
24061 /* We have no free low regs, so save one. */
24062 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24063 LAST_ARG_REGNUM);
24066 /* Get the return address into a temporary register. */
24067 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24069 if (size > 12)
24071 /* Move the return address to lr. */
24072 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24073 LAST_ARG_REGNUM);
24074 /* Restore the low register. */
24075 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24076 IP_REGNUM);
24077 regno = LR_REGNUM;
24079 else
24080 regno = LAST_ARG_REGNUM;
24082 else
24083 regno = LR_REGNUM;
24085 /* Remove the argument registers that were pushed onto the stack. */
24086 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24087 SP_REGNUM, SP_REGNUM,
24088 crtl->args.pretend_args_size);
24090 thumb_exit (asm_out_file, regno);
24093 return "";
24096 /* Functions to save and restore machine-specific function data. */
24097 static struct machine_function *
24098 arm_init_machine_status (void)
24100 struct machine_function *machine;
24101 machine = ggc_cleared_alloc<machine_function> ();
24103 #if ARM_FT_UNKNOWN != 0
24104 machine->func_type = ARM_FT_UNKNOWN;
24105 #endif
24106 return machine;
24109 /* Return an RTX indicating where the return address to the
24110 calling function can be found. */
24112 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24114 if (count != 0)
24115 return NULL_RTX;
24117 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24120 /* Do anything needed before RTL is emitted for each function. */
24121 void
24122 arm_init_expanders (void)
24124 /* Arrange to initialize and mark the machine per-function status. */
24125 init_machine_status = arm_init_machine_status;
24127 /* This is to stop the combine pass optimizing away the alignment
24128 adjustment of va_arg. */
24129 /* ??? It is claimed that this should not be necessary. */
24130 if (cfun)
24131 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24135 /* Like arm_compute_initial_elimination offset. Simpler because there
24136 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24137 to point at the base of the local variables after static stack
24138 space for a function has been allocated. */
24140 HOST_WIDE_INT
24141 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24143 arm_stack_offsets *offsets;
24145 offsets = arm_get_frame_offsets ();
24147 switch (from)
24149 case ARG_POINTER_REGNUM:
24150 switch (to)
24152 case STACK_POINTER_REGNUM:
24153 return offsets->outgoing_args - offsets->saved_args;
24155 case FRAME_POINTER_REGNUM:
24156 return offsets->soft_frame - offsets->saved_args;
24158 case ARM_HARD_FRAME_POINTER_REGNUM:
24159 return offsets->saved_regs - offsets->saved_args;
24161 case THUMB_HARD_FRAME_POINTER_REGNUM:
24162 return offsets->locals_base - offsets->saved_args;
24164 default:
24165 gcc_unreachable ();
24167 break;
24169 case FRAME_POINTER_REGNUM:
24170 switch (to)
24172 case STACK_POINTER_REGNUM:
24173 return offsets->outgoing_args - offsets->soft_frame;
24175 case ARM_HARD_FRAME_POINTER_REGNUM:
24176 return offsets->saved_regs - offsets->soft_frame;
24178 case THUMB_HARD_FRAME_POINTER_REGNUM:
24179 return offsets->locals_base - offsets->soft_frame;
24181 default:
24182 gcc_unreachable ();
24184 break;
24186 default:
24187 gcc_unreachable ();
24191 /* Generate the function's prologue. */
24193 void
24194 thumb1_expand_prologue (void)
24196 rtx_insn *insn;
24198 HOST_WIDE_INT amount;
24199 arm_stack_offsets *offsets;
24200 unsigned long func_type;
24201 int regno;
24202 unsigned long live_regs_mask;
24203 unsigned long l_mask;
24204 unsigned high_regs_pushed = 0;
24206 func_type = arm_current_func_type ();
24208 /* Naked functions don't have prologues. */
24209 if (IS_NAKED (func_type))
24210 return;
24212 if (IS_INTERRUPT (func_type))
24214 error ("interrupt Service Routines cannot be coded in Thumb mode");
24215 return;
24218 if (is_called_in_ARM_mode (current_function_decl))
24219 emit_insn (gen_prologue_thumb1_interwork ());
24221 offsets = arm_get_frame_offsets ();
24222 live_regs_mask = offsets->saved_regs_mask;
24224 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24225 l_mask = live_regs_mask & 0x40ff;
24226 /* Then count how many other high registers will need to be pushed. */
24227 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24229 if (crtl->args.pretend_args_size)
24231 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24233 if (cfun->machine->uses_anonymous_args)
24235 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24236 unsigned long mask;
24238 mask = 1ul << (LAST_ARG_REGNUM + 1);
24239 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24241 insn = thumb1_emit_multi_reg_push (mask, 0);
24243 else
24245 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24246 stack_pointer_rtx, x));
24248 RTX_FRAME_RELATED_P (insn) = 1;
24251 if (TARGET_BACKTRACE)
24253 HOST_WIDE_INT offset = 0;
24254 unsigned work_register;
24255 rtx work_reg, x, arm_hfp_rtx;
24257 /* We have been asked to create a stack backtrace structure.
24258 The code looks like this:
24260 0 .align 2
24261 0 func:
24262 0 sub SP, #16 Reserve space for 4 registers.
24263 2 push {R7} Push low registers.
24264 4 add R7, SP, #20 Get the stack pointer before the push.
24265 6 str R7, [SP, #8] Store the stack pointer
24266 (before reserving the space).
24267 8 mov R7, PC Get hold of the start of this code + 12.
24268 10 str R7, [SP, #16] Store it.
24269 12 mov R7, FP Get hold of the current frame pointer.
24270 14 str R7, [SP, #4] Store it.
24271 16 mov R7, LR Get hold of the current return address.
24272 18 str R7, [SP, #12] Store it.
24273 20 add R7, SP, #16 Point at the start of the
24274 backtrace structure.
24275 22 mov FP, R7 Put this value into the frame pointer. */
24277 work_register = thumb_find_work_register (live_regs_mask);
24278 work_reg = gen_rtx_REG (SImode, work_register);
24279 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24281 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24282 stack_pointer_rtx, GEN_INT (-16)));
24283 RTX_FRAME_RELATED_P (insn) = 1;
24285 if (l_mask)
24287 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24288 RTX_FRAME_RELATED_P (insn) = 1;
24290 offset = bit_count (l_mask) * UNITS_PER_WORD;
24293 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24294 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24296 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24297 x = gen_frame_mem (SImode, x);
24298 emit_move_insn (x, work_reg);
24300 /* Make sure that the instruction fetching the PC is in the right place
24301 to calculate "start of backtrace creation code + 12". */
24302 /* ??? The stores using the common WORK_REG ought to be enough to
24303 prevent the scheduler from doing anything weird. Failing that
24304 we could always move all of the following into an UNSPEC_VOLATILE. */
24305 if (l_mask)
24307 x = gen_rtx_REG (SImode, PC_REGNUM);
24308 emit_move_insn (work_reg, x);
24310 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24311 x = gen_frame_mem (SImode, x);
24312 emit_move_insn (x, work_reg);
24314 emit_move_insn (work_reg, arm_hfp_rtx);
24316 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24317 x = gen_frame_mem (SImode, x);
24318 emit_move_insn (x, work_reg);
24320 else
24322 emit_move_insn (work_reg, arm_hfp_rtx);
24324 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24325 x = gen_frame_mem (SImode, x);
24326 emit_move_insn (x, work_reg);
24328 x = gen_rtx_REG (SImode, PC_REGNUM);
24329 emit_move_insn (work_reg, x);
24331 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24332 x = gen_frame_mem (SImode, x);
24333 emit_move_insn (x, work_reg);
24336 x = gen_rtx_REG (SImode, LR_REGNUM);
24337 emit_move_insn (work_reg, x);
24339 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24340 x = gen_frame_mem (SImode, x);
24341 emit_move_insn (x, work_reg);
24343 x = GEN_INT (offset + 12);
24344 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24346 emit_move_insn (arm_hfp_rtx, work_reg);
24348 /* Optimization: If we are not pushing any low registers but we are going
24349 to push some high registers then delay our first push. This will just
24350 be a push of LR and we can combine it with the push of the first high
24351 register. */
24352 else if ((l_mask & 0xff) != 0
24353 || (high_regs_pushed == 0 && l_mask))
24355 unsigned long mask = l_mask;
24356 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24357 insn = thumb1_emit_multi_reg_push (mask, mask);
24358 RTX_FRAME_RELATED_P (insn) = 1;
24361 if (high_regs_pushed)
24363 unsigned pushable_regs;
24364 unsigned next_hi_reg;
24365 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24366 : crtl->args.info.nregs;
24367 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24369 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24370 if (live_regs_mask & (1 << next_hi_reg))
24371 break;
24373 /* Here we need to mask out registers used for passing arguments
24374 even if they can be pushed. This is to avoid using them to stash the high
24375 registers. Such kind of stash may clobber the use of arguments. */
24376 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
24378 if (pushable_regs == 0)
24379 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24381 while (high_regs_pushed > 0)
24383 unsigned long real_regs_mask = 0;
24385 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
24387 if (pushable_regs & (1 << regno))
24389 emit_move_insn (gen_rtx_REG (SImode, regno),
24390 gen_rtx_REG (SImode, next_hi_reg));
24392 high_regs_pushed --;
24393 real_regs_mask |= (1 << next_hi_reg);
24395 if (high_regs_pushed)
24397 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24398 next_hi_reg --)
24399 if (live_regs_mask & (1 << next_hi_reg))
24400 break;
24402 else
24404 pushable_regs &= ~((1 << regno) - 1);
24405 break;
24410 /* If we had to find a work register and we have not yet
24411 saved the LR then add it to the list of regs to push. */
24412 if (l_mask == (1 << LR_REGNUM))
24414 pushable_regs |= l_mask;
24415 real_regs_mask |= l_mask;
24416 l_mask = 0;
24419 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
24420 RTX_FRAME_RELATED_P (insn) = 1;
24424 /* Load the pic register before setting the frame pointer,
24425 so we can use r7 as a temporary work register. */
24426 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24427 arm_load_pic_register (live_regs_mask);
24429 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24430 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24431 stack_pointer_rtx);
24433 if (flag_stack_usage_info)
24434 current_function_static_stack_size
24435 = offsets->outgoing_args - offsets->saved_args;
24437 amount = offsets->outgoing_args - offsets->saved_regs;
24438 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24439 if (amount)
24441 if (amount < 512)
24443 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24444 GEN_INT (- amount)));
24445 RTX_FRAME_RELATED_P (insn) = 1;
24447 else
24449 rtx reg, dwarf;
24451 /* The stack decrement is too big for an immediate value in a single
24452 insn. In theory we could issue multiple subtracts, but after
24453 three of them it becomes more space efficient to place the full
24454 value in the constant pool and load into a register. (Also the
24455 ARM debugger really likes to see only one stack decrement per
24456 function). So instead we look for a scratch register into which
24457 we can load the decrement, and then we subtract this from the
24458 stack pointer. Unfortunately on the thumb the only available
24459 scratch registers are the argument registers, and we cannot use
24460 these as they may hold arguments to the function. Instead we
24461 attempt to locate a call preserved register which is used by this
24462 function. If we can find one, then we know that it will have
24463 been pushed at the start of the prologue and so we can corrupt
24464 it now. */
24465 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24466 if (live_regs_mask & (1 << regno))
24467 break;
24469 gcc_assert(regno <= LAST_LO_REGNUM);
24471 reg = gen_rtx_REG (SImode, regno);
24473 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24475 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24476 stack_pointer_rtx, reg));
24478 dwarf = gen_rtx_SET (stack_pointer_rtx,
24479 plus_constant (Pmode, stack_pointer_rtx,
24480 -amount));
24481 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24482 RTX_FRAME_RELATED_P (insn) = 1;
24486 if (frame_pointer_needed)
24487 thumb_set_frame_pointer (offsets);
24489 /* If we are profiling, make sure no instructions are scheduled before
24490 the call to mcount. Similarly if the user has requested no
24491 scheduling in the prolog. Similarly if we want non-call exceptions
24492 using the EABI unwinder, to prevent faulting instructions from being
24493 swapped with a stack adjustment. */
24494 if (crtl->profile || !TARGET_SCHED_PROLOG
24495 || (arm_except_unwind_info (&global_options) == UI_TARGET
24496 && cfun->can_throw_non_call_exceptions))
24497 emit_insn (gen_blockage ());
24499 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24500 if (live_regs_mask & 0xff)
24501 cfun->machine->lr_save_eliminated = 0;
24504 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
24505 POP instruction can be generated. LR should be replaced by PC. All
24506 the checks required are already done by USE_RETURN_INSN (). Hence,
24507 all we really need to check here is if single register is to be
24508 returned, or multiple register return. */
24509 void
24510 thumb2_expand_return (bool simple_return)
24512 int i, num_regs;
24513 unsigned long saved_regs_mask;
24514 arm_stack_offsets *offsets;
24516 offsets = arm_get_frame_offsets ();
24517 saved_regs_mask = offsets->saved_regs_mask;
24519 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
24520 if (saved_regs_mask & (1 << i))
24521 num_regs++;
24523 if (!simple_return && saved_regs_mask)
24525 if (num_regs == 1)
24527 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
24528 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
24529 rtx addr = gen_rtx_MEM (SImode,
24530 gen_rtx_POST_INC (SImode,
24531 stack_pointer_rtx));
24532 set_mem_alias_set (addr, get_frame_alias_set ());
24533 XVECEXP (par, 0, 0) = ret_rtx;
24534 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
24535 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
24536 emit_jump_insn (par);
24538 else
24540 saved_regs_mask &= ~ (1 << LR_REGNUM);
24541 saved_regs_mask |= (1 << PC_REGNUM);
24542 arm_emit_multi_reg_pop (saved_regs_mask);
24545 else
24547 emit_jump_insn (simple_return_rtx);
24551 void
24552 thumb1_expand_epilogue (void)
24554 HOST_WIDE_INT amount;
24555 arm_stack_offsets *offsets;
24556 int regno;
24558 /* Naked functions don't have prologues. */
24559 if (IS_NAKED (arm_current_func_type ()))
24560 return;
24562 offsets = arm_get_frame_offsets ();
24563 amount = offsets->outgoing_args - offsets->saved_regs;
24565 if (frame_pointer_needed)
24567 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
24568 amount = offsets->locals_base - offsets->saved_regs;
24570 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
24572 gcc_assert (amount >= 0);
24573 if (amount)
24575 emit_insn (gen_blockage ());
24577 if (amount < 512)
24578 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24579 GEN_INT (amount)));
24580 else
24582 /* r3 is always free in the epilogue. */
24583 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
24585 emit_insn (gen_movsi (reg, GEN_INT (amount)));
24586 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
24590 /* Emit a USE (stack_pointer_rtx), so that
24591 the stack adjustment will not be deleted. */
24592 emit_insn (gen_force_register_use (stack_pointer_rtx));
24594 if (crtl->profile || !TARGET_SCHED_PROLOG)
24595 emit_insn (gen_blockage ());
24597 /* Emit a clobber for each insn that will be restored in the epilogue,
24598 so that flow2 will get register lifetimes correct. */
24599 for (regno = 0; regno < 13; regno++)
24600 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
24601 emit_clobber (gen_rtx_REG (SImode, regno));
24603 if (! df_regs_ever_live_p (LR_REGNUM))
24604 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
24607 /* Epilogue code for APCS frame. */
24608 static void
24609 arm_expand_epilogue_apcs_frame (bool really_return)
24611 unsigned long func_type;
24612 unsigned long saved_regs_mask;
24613 int num_regs = 0;
24614 int i;
24615 int floats_from_frame = 0;
24616 arm_stack_offsets *offsets;
24618 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
24619 func_type = arm_current_func_type ();
24621 /* Get frame offsets for ARM. */
24622 offsets = arm_get_frame_offsets ();
24623 saved_regs_mask = offsets->saved_regs_mask;
24625 /* Find the offset of the floating-point save area in the frame. */
24626 floats_from_frame
24627 = (offsets->saved_args
24628 + arm_compute_static_chain_stack_bytes ()
24629 - offsets->frame);
24631 /* Compute how many core registers saved and how far away the floats are. */
24632 for (i = 0; i <= LAST_ARM_REGNUM; i++)
24633 if (saved_regs_mask & (1 << i))
24635 num_regs++;
24636 floats_from_frame += 4;
24639 if (TARGET_HARD_FLOAT && TARGET_VFP)
24641 int start_reg;
24642 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
24644 /* The offset is from IP_REGNUM. */
24645 int saved_size = arm_get_vfp_saved_size ();
24646 if (saved_size > 0)
24648 rtx_insn *insn;
24649 floats_from_frame += saved_size;
24650 insn = emit_insn (gen_addsi3 (ip_rtx,
24651 hard_frame_pointer_rtx,
24652 GEN_INT (-floats_from_frame)));
24653 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
24654 ip_rtx, hard_frame_pointer_rtx);
24657 /* Generate VFP register multi-pop. */
24658 start_reg = FIRST_VFP_REGNUM;
24660 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
24661 /* Look for a case where a reg does not need restoring. */
24662 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24663 && (!df_regs_ever_live_p (i + 1)
24664 || call_used_regs[i + 1]))
24666 if (start_reg != i)
24667 arm_emit_vfp_multi_reg_pop (start_reg,
24668 (i - start_reg) / 2,
24669 gen_rtx_REG (SImode,
24670 IP_REGNUM));
24671 start_reg = i + 2;
24674 /* Restore the remaining regs that we have discovered (or possibly
24675 even all of them, if the conditional in the for loop never
24676 fired). */
24677 if (start_reg != i)
24678 arm_emit_vfp_multi_reg_pop (start_reg,
24679 (i - start_reg) / 2,
24680 gen_rtx_REG (SImode, IP_REGNUM));
24683 if (TARGET_IWMMXT)
24685 /* The frame pointer is guaranteed to be non-double-word aligned, as
24686 it is set to double-word-aligned old_stack_pointer - 4. */
24687 rtx_insn *insn;
24688 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
24690 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
24691 if (df_regs_ever_live_p (i) && !call_used_regs[i])
24693 rtx addr = gen_frame_mem (V2SImode,
24694 plus_constant (Pmode, hard_frame_pointer_rtx,
24695 - lrm_count * 4));
24696 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24697 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24698 gen_rtx_REG (V2SImode, i),
24699 NULL_RTX);
24700 lrm_count += 2;
24704 /* saved_regs_mask should contain IP which contains old stack pointer
24705 at the time of activation creation. Since SP and IP are adjacent registers,
24706 we can restore the value directly into SP. */
24707 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
24708 saved_regs_mask &= ~(1 << IP_REGNUM);
24709 saved_regs_mask |= (1 << SP_REGNUM);
24711 /* There are two registers left in saved_regs_mask - LR and PC. We
24712 only need to restore LR (the return address), but to
24713 save time we can load it directly into PC, unless we need a
24714 special function exit sequence, or we are not really returning. */
24715 if (really_return
24716 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
24717 && !crtl->calls_eh_return)
24718 /* Delete LR from the register mask, so that LR on
24719 the stack is loaded into the PC in the register mask. */
24720 saved_regs_mask &= ~(1 << LR_REGNUM);
24721 else
24722 saved_regs_mask &= ~(1 << PC_REGNUM);
24724 num_regs = bit_count (saved_regs_mask);
24725 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
24727 rtx_insn *insn;
24728 emit_insn (gen_blockage ());
24729 /* Unwind the stack to just below the saved registers. */
24730 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24731 hard_frame_pointer_rtx,
24732 GEN_INT (- 4 * num_regs)));
24734 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
24735 stack_pointer_rtx, hard_frame_pointer_rtx);
24738 arm_emit_multi_reg_pop (saved_regs_mask);
24740 if (IS_INTERRUPT (func_type))
24742 /* Interrupt handlers will have pushed the
24743 IP onto the stack, so restore it now. */
24744 rtx_insn *insn;
24745 rtx addr = gen_rtx_MEM (SImode,
24746 gen_rtx_POST_INC (SImode,
24747 stack_pointer_rtx));
24748 set_mem_alias_set (addr, get_frame_alias_set ());
24749 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
24750 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24751 gen_rtx_REG (SImode, IP_REGNUM),
24752 NULL_RTX);
24755 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
24756 return;
24758 if (crtl->calls_eh_return)
24759 emit_insn (gen_addsi3 (stack_pointer_rtx,
24760 stack_pointer_rtx,
24761 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
24763 if (IS_STACKALIGN (func_type))
24764 /* Restore the original stack pointer. Before prologue, the stack was
24765 realigned and the original stack pointer saved in r0. For details,
24766 see comment in arm_expand_prologue. */
24767 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
24769 emit_jump_insn (simple_return_rtx);
24772 /* Generate RTL to represent ARM epilogue. Really_return is true if the
24773 function is not a sibcall. */
24774 void
24775 arm_expand_epilogue (bool really_return)
24777 unsigned long func_type;
24778 unsigned long saved_regs_mask;
24779 int num_regs = 0;
24780 int i;
24781 int amount;
24782 arm_stack_offsets *offsets;
24784 func_type = arm_current_func_type ();
24786 /* Naked functions don't have epilogue. Hence, generate return pattern, and
24787 let output_return_instruction take care of instruction emission if any. */
24788 if (IS_NAKED (func_type)
24789 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
24791 if (really_return)
24792 emit_jump_insn (simple_return_rtx);
24793 return;
24796 /* If we are throwing an exception, then we really must be doing a
24797 return, so we can't tail-call. */
24798 gcc_assert (!crtl->calls_eh_return || really_return);
24800 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
24802 arm_expand_epilogue_apcs_frame (really_return);
24803 return;
24806 /* Get frame offsets for ARM. */
24807 offsets = arm_get_frame_offsets ();
24808 saved_regs_mask = offsets->saved_regs_mask;
24809 num_regs = bit_count (saved_regs_mask);
24811 if (frame_pointer_needed)
24813 rtx_insn *insn;
24814 /* Restore stack pointer if necessary. */
24815 if (TARGET_ARM)
24817 /* In ARM mode, frame pointer points to first saved register.
24818 Restore stack pointer to last saved register. */
24819 amount = offsets->frame - offsets->saved_regs;
24821 /* Force out any pending memory operations that reference stacked data
24822 before stack de-allocation occurs. */
24823 emit_insn (gen_blockage ());
24824 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24825 hard_frame_pointer_rtx,
24826 GEN_INT (amount)));
24827 arm_add_cfa_adjust_cfa_note (insn, amount,
24828 stack_pointer_rtx,
24829 hard_frame_pointer_rtx);
24831 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24832 deleted. */
24833 emit_insn (gen_force_register_use (stack_pointer_rtx));
24835 else
24837 /* In Thumb-2 mode, the frame pointer points to the last saved
24838 register. */
24839 amount = offsets->locals_base - offsets->saved_regs;
24840 if (amount)
24842 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
24843 hard_frame_pointer_rtx,
24844 GEN_INT (amount)));
24845 arm_add_cfa_adjust_cfa_note (insn, amount,
24846 hard_frame_pointer_rtx,
24847 hard_frame_pointer_rtx);
24850 /* Force out any pending memory operations that reference stacked data
24851 before stack de-allocation occurs. */
24852 emit_insn (gen_blockage ());
24853 insn = emit_insn (gen_movsi (stack_pointer_rtx,
24854 hard_frame_pointer_rtx));
24855 arm_add_cfa_adjust_cfa_note (insn, 0,
24856 stack_pointer_rtx,
24857 hard_frame_pointer_rtx);
24858 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24859 deleted. */
24860 emit_insn (gen_force_register_use (stack_pointer_rtx));
24863 else
24865 /* Pop off outgoing args and local frame to adjust stack pointer to
24866 last saved register. */
24867 amount = offsets->outgoing_args - offsets->saved_regs;
24868 if (amount)
24870 rtx_insn *tmp;
24871 /* Force out any pending memory operations that reference stacked data
24872 before stack de-allocation occurs. */
24873 emit_insn (gen_blockage ());
24874 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
24875 stack_pointer_rtx,
24876 GEN_INT (amount)));
24877 arm_add_cfa_adjust_cfa_note (tmp, amount,
24878 stack_pointer_rtx, stack_pointer_rtx);
24879 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
24880 not deleted. */
24881 emit_insn (gen_force_register_use (stack_pointer_rtx));
24885 if (TARGET_HARD_FLOAT && TARGET_VFP)
24887 /* Generate VFP register multi-pop. */
24888 int end_reg = LAST_VFP_REGNUM + 1;
24890 /* Scan the registers in reverse order. We need to match
24891 any groupings made in the prologue and generate matching
24892 vldm operations. The need to match groups is because,
24893 unlike pop, vldm can only do consecutive regs. */
24894 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
24895 /* Look for a case where a reg does not need restoring. */
24896 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24897 && (!df_regs_ever_live_p (i + 1)
24898 || call_used_regs[i + 1]))
24900 /* Restore the regs discovered so far (from reg+2 to
24901 end_reg). */
24902 if (end_reg > i + 2)
24903 arm_emit_vfp_multi_reg_pop (i + 2,
24904 (end_reg - (i + 2)) / 2,
24905 stack_pointer_rtx);
24906 end_reg = i;
24909 /* Restore the remaining regs that we have discovered (or possibly
24910 even all of them, if the conditional in the for loop never
24911 fired). */
24912 if (end_reg > i + 2)
24913 arm_emit_vfp_multi_reg_pop (i + 2,
24914 (end_reg - (i + 2)) / 2,
24915 stack_pointer_rtx);
24918 if (TARGET_IWMMXT)
24919 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
24920 if (df_regs_ever_live_p (i) && !call_used_regs[i])
24922 rtx_insn *insn;
24923 rtx addr = gen_rtx_MEM (V2SImode,
24924 gen_rtx_POST_INC (SImode,
24925 stack_pointer_rtx));
24926 set_mem_alias_set (addr, get_frame_alias_set ());
24927 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24928 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24929 gen_rtx_REG (V2SImode, i),
24930 NULL_RTX);
24931 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
24932 stack_pointer_rtx, stack_pointer_rtx);
24935 if (saved_regs_mask)
24937 rtx insn;
24938 bool return_in_pc = false;
24940 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
24941 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
24942 && !IS_STACKALIGN (func_type)
24943 && really_return
24944 && crtl->args.pretend_args_size == 0
24945 && saved_regs_mask & (1 << LR_REGNUM)
24946 && !crtl->calls_eh_return)
24948 saved_regs_mask &= ~(1 << LR_REGNUM);
24949 saved_regs_mask |= (1 << PC_REGNUM);
24950 return_in_pc = true;
24953 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
24955 for (i = 0; i <= LAST_ARM_REGNUM; i++)
24956 if (saved_regs_mask & (1 << i))
24958 rtx addr = gen_rtx_MEM (SImode,
24959 gen_rtx_POST_INC (SImode,
24960 stack_pointer_rtx));
24961 set_mem_alias_set (addr, get_frame_alias_set ());
24963 if (i == PC_REGNUM)
24965 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
24966 XVECEXP (insn, 0, 0) = ret_rtx;
24967 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
24968 addr);
24969 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
24970 insn = emit_jump_insn (insn);
24972 else
24974 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
24975 addr));
24976 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24977 gen_rtx_REG (SImode, i),
24978 NULL_RTX);
24979 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
24980 stack_pointer_rtx,
24981 stack_pointer_rtx);
24985 else
24987 if (TARGET_LDRD
24988 && current_tune->prefer_ldrd_strd
24989 && !optimize_function_for_size_p (cfun))
24991 if (TARGET_THUMB2)
24992 thumb2_emit_ldrd_pop (saved_regs_mask);
24993 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
24994 arm_emit_ldrd_pop (saved_regs_mask);
24995 else
24996 arm_emit_multi_reg_pop (saved_regs_mask);
24998 else
24999 arm_emit_multi_reg_pop (saved_regs_mask);
25002 if (return_in_pc)
25003 return;
25006 if (crtl->args.pretend_args_size)
25008 int i, j;
25009 rtx dwarf = NULL_RTX;
25010 rtx_insn *tmp =
25011 emit_insn (gen_addsi3 (stack_pointer_rtx,
25012 stack_pointer_rtx,
25013 GEN_INT (crtl->args.pretend_args_size)));
25015 RTX_FRAME_RELATED_P (tmp) = 1;
25017 if (cfun->machine->uses_anonymous_args)
25019 /* Restore pretend args. Refer arm_expand_prologue on how to save
25020 pretend_args in stack. */
25021 int num_regs = crtl->args.pretend_args_size / 4;
25022 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25023 for (j = 0, i = 0; j < num_regs; i++)
25024 if (saved_regs_mask & (1 << i))
25026 rtx reg = gen_rtx_REG (SImode, i);
25027 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25028 j++;
25030 REG_NOTES (tmp) = dwarf;
25032 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
25033 stack_pointer_rtx, stack_pointer_rtx);
25036 if (!really_return)
25037 return;
25039 if (crtl->calls_eh_return)
25040 emit_insn (gen_addsi3 (stack_pointer_rtx,
25041 stack_pointer_rtx,
25042 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25044 if (IS_STACKALIGN (func_type))
25045 /* Restore the original stack pointer. Before prologue, the stack was
25046 realigned and the original stack pointer saved in r0. For details,
25047 see comment in arm_expand_prologue. */
25048 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25050 emit_jump_insn (simple_return_rtx);
25053 /* Implementation of insn prologue_thumb1_interwork. This is the first
25054 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25056 const char *
25057 thumb1_output_interwork (void)
25059 const char * name;
25060 FILE *f = asm_out_file;
25062 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25063 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25064 == SYMBOL_REF);
25065 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25067 /* Generate code sequence to switch us into Thumb mode. */
25068 /* The .code 32 directive has already been emitted by
25069 ASM_DECLARE_FUNCTION_NAME. */
25070 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25071 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25073 /* Generate a label, so that the debugger will notice the
25074 change in instruction sets. This label is also used by
25075 the assembler to bypass the ARM code when this function
25076 is called from a Thumb encoded function elsewhere in the
25077 same file. Hence the definition of STUB_NAME here must
25078 agree with the definition in gas/config/tc-arm.c. */
25080 #define STUB_NAME ".real_start_of"
25082 fprintf (f, "\t.code\t16\n");
25083 #ifdef ARM_PE
25084 if (arm_dllexport_name_p (name))
25085 name = arm_strip_name_encoding (name);
25086 #endif
25087 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25088 fprintf (f, "\t.thumb_func\n");
25089 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25091 return "";
25094 /* Handle the case of a double word load into a low register from
25095 a computed memory address. The computed address may involve a
25096 register which is overwritten by the load. */
25097 const char *
25098 thumb_load_double_from_address (rtx *operands)
25100 rtx addr;
25101 rtx base;
25102 rtx offset;
25103 rtx arg1;
25104 rtx arg2;
25106 gcc_assert (REG_P (operands[0]));
25107 gcc_assert (MEM_P (operands[1]));
25109 /* Get the memory address. */
25110 addr = XEXP (operands[1], 0);
25112 /* Work out how the memory address is computed. */
25113 switch (GET_CODE (addr))
25115 case REG:
25116 operands[2] = adjust_address (operands[1], SImode, 4);
25118 if (REGNO (operands[0]) == REGNO (addr))
25120 output_asm_insn ("ldr\t%H0, %2", operands);
25121 output_asm_insn ("ldr\t%0, %1", operands);
25123 else
25125 output_asm_insn ("ldr\t%0, %1", operands);
25126 output_asm_insn ("ldr\t%H0, %2", operands);
25128 break;
25130 case CONST:
25131 /* Compute <address> + 4 for the high order load. */
25132 operands[2] = adjust_address (operands[1], SImode, 4);
25134 output_asm_insn ("ldr\t%0, %1", operands);
25135 output_asm_insn ("ldr\t%H0, %2", operands);
25136 break;
25138 case PLUS:
25139 arg1 = XEXP (addr, 0);
25140 arg2 = XEXP (addr, 1);
25142 if (CONSTANT_P (arg1))
25143 base = arg2, offset = arg1;
25144 else
25145 base = arg1, offset = arg2;
25147 gcc_assert (REG_P (base));
25149 /* Catch the case of <address> = <reg> + <reg> */
25150 if (REG_P (offset))
25152 int reg_offset = REGNO (offset);
25153 int reg_base = REGNO (base);
25154 int reg_dest = REGNO (operands[0]);
25156 /* Add the base and offset registers together into the
25157 higher destination register. */
25158 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25159 reg_dest + 1, reg_base, reg_offset);
25161 /* Load the lower destination register from the address in
25162 the higher destination register. */
25163 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25164 reg_dest, reg_dest + 1);
25166 /* Load the higher destination register from its own address
25167 plus 4. */
25168 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25169 reg_dest + 1, reg_dest + 1);
25171 else
25173 /* Compute <address> + 4 for the high order load. */
25174 operands[2] = adjust_address (operands[1], SImode, 4);
25176 /* If the computed address is held in the low order register
25177 then load the high order register first, otherwise always
25178 load the low order register first. */
25179 if (REGNO (operands[0]) == REGNO (base))
25181 output_asm_insn ("ldr\t%H0, %2", operands);
25182 output_asm_insn ("ldr\t%0, %1", operands);
25184 else
25186 output_asm_insn ("ldr\t%0, %1", operands);
25187 output_asm_insn ("ldr\t%H0, %2", operands);
25190 break;
25192 case LABEL_REF:
25193 /* With no registers to worry about we can just load the value
25194 directly. */
25195 operands[2] = adjust_address (operands[1], SImode, 4);
25197 output_asm_insn ("ldr\t%H0, %2", operands);
25198 output_asm_insn ("ldr\t%0, %1", operands);
25199 break;
25201 default:
25202 gcc_unreachable ();
25205 return "";
25208 const char *
25209 thumb_output_move_mem_multiple (int n, rtx *operands)
25211 rtx tmp;
25213 switch (n)
25215 case 2:
25216 if (REGNO (operands[4]) > REGNO (operands[5]))
25218 tmp = operands[4];
25219 operands[4] = operands[5];
25220 operands[5] = tmp;
25222 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25223 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25224 break;
25226 case 3:
25227 if (REGNO (operands[4]) > REGNO (operands[5]))
25228 std::swap (operands[4], operands[5]);
25229 if (REGNO (operands[5]) > REGNO (operands[6]))
25230 std::swap (operands[5], operands[6]);
25231 if (REGNO (operands[4]) > REGNO (operands[5]))
25232 std::swap (operands[4], operands[5]);
25234 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25235 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25236 break;
25238 default:
25239 gcc_unreachable ();
25242 return "";
25245 /* Output a call-via instruction for thumb state. */
25246 const char *
25247 thumb_call_via_reg (rtx reg)
25249 int regno = REGNO (reg);
25250 rtx *labelp;
25252 gcc_assert (regno < LR_REGNUM);
25254 /* If we are in the normal text section we can use a single instance
25255 per compilation unit. If we are doing function sections, then we need
25256 an entry per section, since we can't rely on reachability. */
25257 if (in_section == text_section)
25259 thumb_call_reg_needed = 1;
25261 if (thumb_call_via_label[regno] == NULL)
25262 thumb_call_via_label[regno] = gen_label_rtx ();
25263 labelp = thumb_call_via_label + regno;
25265 else
25267 if (cfun->machine->call_via[regno] == NULL)
25268 cfun->machine->call_via[regno] = gen_label_rtx ();
25269 labelp = cfun->machine->call_via + regno;
25272 output_asm_insn ("bl\t%a0", labelp);
25273 return "";
25276 /* Routines for generating rtl. */
25277 void
25278 thumb_expand_movmemqi (rtx *operands)
25280 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25281 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25282 HOST_WIDE_INT len = INTVAL (operands[2]);
25283 HOST_WIDE_INT offset = 0;
25285 while (len >= 12)
25287 emit_insn (gen_movmem12b (out, in, out, in));
25288 len -= 12;
25291 if (len >= 8)
25293 emit_insn (gen_movmem8b (out, in, out, in));
25294 len -= 8;
25297 if (len >= 4)
25299 rtx reg = gen_reg_rtx (SImode);
25300 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25301 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25302 len -= 4;
25303 offset += 4;
25306 if (len >= 2)
25308 rtx reg = gen_reg_rtx (HImode);
25309 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25310 plus_constant (Pmode, in,
25311 offset))));
25312 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25313 offset)),
25314 reg));
25315 len -= 2;
25316 offset += 2;
25319 if (len)
25321 rtx reg = gen_reg_rtx (QImode);
25322 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25323 plus_constant (Pmode, in,
25324 offset))));
25325 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25326 offset)),
25327 reg));
25331 void
25332 thumb_reload_out_hi (rtx *operands)
25334 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25337 /* Handle reading a half-word from memory during reload. */
25338 void
25339 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
25341 gcc_unreachable ();
25344 /* Return the length of a function name prefix
25345 that starts with the character 'c'. */
25346 static int
25347 arm_get_strip_length (int c)
25349 switch (c)
25351 ARM_NAME_ENCODING_LENGTHS
25352 default: return 0;
25356 /* Return a pointer to a function's name with any
25357 and all prefix encodings stripped from it. */
25358 const char *
25359 arm_strip_name_encoding (const char *name)
25361 int skip;
25363 while ((skip = arm_get_strip_length (* name)))
25364 name += skip;
25366 return name;
25369 /* If there is a '*' anywhere in the name's prefix, then
25370 emit the stripped name verbatim, otherwise prepend an
25371 underscore if leading underscores are being used. */
25372 void
25373 arm_asm_output_labelref (FILE *stream, const char *name)
25375 int skip;
25376 int verbatim = 0;
25378 while ((skip = arm_get_strip_length (* name)))
25380 verbatim |= (*name == '*');
25381 name += skip;
25384 if (verbatim)
25385 fputs (name, stream);
25386 else
25387 asm_fprintf (stream, "%U%s", name);
25390 /* This function is used to emit an EABI tag and its associated value.
25391 We emit the numerical value of the tag in case the assembler does not
25392 support textual tags. (Eg gas prior to 2.20). If requested we include
25393 the tag name in a comment so that anyone reading the assembler output
25394 will know which tag is being set.
25396 This function is not static because arm-c.c needs it too. */
25398 void
25399 arm_emit_eabi_attribute (const char *name, int num, int val)
25401 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
25402 if (flag_verbose_asm || flag_debug_asm)
25403 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
25404 asm_fprintf (asm_out_file, "\n");
25407 /* This function is used to print CPU tuning information as comment
25408 in assembler file. Pointers are not printed for now. */
25410 void
25411 arm_print_tune_info (void)
25413 asm_fprintf (asm_out_file, "\t@.tune parameters\n");
25414 asm_fprintf (asm_out_file, "\t\t@constant_limit:\t%d\n",
25415 current_tune->constant_limit);
25416 asm_fprintf (asm_out_file, "\t\t@max_insns_skipped:\t%d\n",
25417 current_tune->max_insns_skipped);
25418 asm_fprintf (asm_out_file, "\t\t@num_prefetch_slots:\t%d\n",
25419 current_tune->num_prefetch_slots);
25420 asm_fprintf (asm_out_file, "\t\t@l1_cache_size:\t%d\n",
25421 current_tune->l1_cache_size);
25422 asm_fprintf (asm_out_file, "\t\t@l1_cache_line_size:\t%d\n",
25423 current_tune->l1_cache_line_size);
25424 asm_fprintf (asm_out_file, "\t\t@prefer_constant_pool:\t%d\n",
25425 (int) current_tune->prefer_constant_pool);
25426 asm_fprintf (asm_out_file, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
25427 asm_fprintf (asm_out_file, "\t\t\t\ts&p\tcost\n");
25428 asm_fprintf (asm_out_file, "\t\t\t\t00\t%d\n",
25429 current_tune->branch_cost (false, false));
25430 asm_fprintf (asm_out_file, "\t\t\t\t01\t%d\n",
25431 current_tune->branch_cost (false, true));
25432 asm_fprintf (asm_out_file, "\t\t\t\t10\t%d\n",
25433 current_tune->branch_cost (true, false));
25434 asm_fprintf (asm_out_file, "\t\t\t\t11\t%d\n",
25435 current_tune->branch_cost (true, true));
25436 asm_fprintf (asm_out_file, "\t\t@prefer_ldrd_strd:\t%d\n",
25437 (int) current_tune->prefer_ldrd_strd);
25438 asm_fprintf (asm_out_file, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
25439 (int) current_tune->logical_op_non_short_circuit[0],
25440 (int) current_tune->logical_op_non_short_circuit[1]);
25441 asm_fprintf (asm_out_file, "\t\t@prefer_neon_for_64bits:\t%d\n",
25442 (int) current_tune->prefer_neon_for_64bits);
25443 asm_fprintf (asm_out_file,
25444 "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
25445 (int) current_tune->disparage_flag_setting_t16_encodings);
25446 asm_fprintf (asm_out_file,
25447 "\t\t@disparage_partial_flag_setting_t16_encodings:\t%d\n",
25448 (int) current_tune
25449 ->disparage_partial_flag_setting_t16_encodings);
25450 asm_fprintf (asm_out_file, "\t\t@string_ops_prefer_neon:\t%d\n",
25451 (int) current_tune->string_ops_prefer_neon);
25452 asm_fprintf (asm_out_file, "\t\t@max_insns_inline_memset:\t%d\n",
25453 current_tune->max_insns_inline_memset);
25454 asm_fprintf (asm_out_file, "\t\t@fuseable_ops:\t%u\n",
25455 current_tune->fuseable_ops);
25456 asm_fprintf (asm_out_file, "\t\t@sched_autopref:\t%d\n",
25457 (int) current_tune->sched_autopref);
25460 static void
25461 arm_file_start (void)
25463 int val;
25465 if (TARGET_UNIFIED_ASM)
25466 asm_fprintf (asm_out_file, "\t.syntax unified\n");
25468 if (TARGET_BPABI)
25470 const char *fpu_name;
25471 if (arm_selected_arch)
25473 /* armv7ve doesn't support any extensions. */
25474 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
25476 /* Keep backward compatability for assemblers
25477 which don't support armv7ve. */
25478 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
25479 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
25480 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
25481 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
25482 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
25484 else
25486 const char* pos = strchr (arm_selected_arch->name, '+');
25487 if (pos)
25489 char buf[15];
25490 gcc_assert (strlen (arm_selected_arch->name)
25491 <= sizeof (buf) / sizeof (*pos));
25492 strncpy (buf, arm_selected_arch->name,
25493 (pos - arm_selected_arch->name) * sizeof (*pos));
25494 buf[pos - arm_selected_arch->name] = '\0';
25495 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
25496 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
25498 else
25499 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
25502 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
25503 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
25504 else
25506 const char* truncated_name
25507 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
25508 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
25511 if (print_tune_info)
25512 arm_print_tune_info ();
25514 if (TARGET_SOFT_FLOAT)
25516 fpu_name = "softvfp";
25518 else
25520 fpu_name = arm_fpu_desc->name;
25521 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
25523 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
25524 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
25526 if (TARGET_HARD_FLOAT_ABI)
25527 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
25530 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
25532 /* Some of these attributes only apply when the corresponding features
25533 are used. However we don't have any easy way of figuring this out.
25534 Conservatively record the setting that would have been used. */
25536 if (flag_rounding_math)
25537 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
25539 if (!flag_unsafe_math_optimizations)
25541 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
25542 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25544 if (flag_signaling_nans)
25545 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25547 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25548 flag_finite_math_only ? 1 : 3);
25550 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25551 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25552 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25553 flag_short_enums ? 1 : 2);
25555 /* Tag_ABI_optimization_goals. */
25556 if (optimize_size)
25557 val = 4;
25558 else if (optimize >= 2)
25559 val = 2;
25560 else if (optimize)
25561 val = 1;
25562 else
25563 val = 6;
25564 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
25566 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
25567 unaligned_access);
25569 if (arm_fp16_format)
25570 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
25571 (int) arm_fp16_format);
25573 if (arm_lang_output_object_attributes_hook)
25574 arm_lang_output_object_attributes_hook();
25577 default_file_start ();
25580 static void
25581 arm_file_end (void)
25583 int regno;
25585 if (NEED_INDICATE_EXEC_STACK)
25586 /* Add .note.GNU-stack. */
25587 file_end_indicate_exec_stack ();
25589 if (! thumb_call_reg_needed)
25590 return;
25592 switch_to_section (text_section);
25593 asm_fprintf (asm_out_file, "\t.code 16\n");
25594 ASM_OUTPUT_ALIGN (asm_out_file, 1);
25596 for (regno = 0; regno < LR_REGNUM; regno++)
25598 rtx label = thumb_call_via_label[regno];
25600 if (label != 0)
25602 targetm.asm_out.internal_label (asm_out_file, "L",
25603 CODE_LABEL_NUMBER (label));
25604 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
25609 #ifndef ARM_PE
25610 /* Symbols in the text segment can be accessed without indirecting via the
25611 constant pool; it may take an extra binary operation, but this is still
25612 faster than indirecting via memory. Don't do this when not optimizing,
25613 since we won't be calculating al of the offsets necessary to do this
25614 simplification. */
25616 static void
25617 arm_encode_section_info (tree decl, rtx rtl, int first)
25619 if (optimize > 0 && TREE_CONSTANT (decl))
25620 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
25622 default_encode_section_info (decl, rtl, first);
25624 #endif /* !ARM_PE */
25626 static void
25627 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
25629 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
25630 && !strcmp (prefix, "L"))
25632 arm_ccfsm_state = 0;
25633 arm_target_insn = NULL;
25635 default_internal_label (stream, prefix, labelno);
25638 /* Output code to add DELTA to the first argument, and then jump
25639 to FUNCTION. Used for C++ multiple inheritance. */
25640 static void
25641 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
25642 HOST_WIDE_INT delta,
25643 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
25644 tree function)
25646 static int thunk_label = 0;
25647 char label[256];
25648 char labelpc[256];
25649 int mi_delta = delta;
25650 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
25651 int shift = 0;
25652 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
25653 ? 1 : 0);
25654 if (mi_delta < 0)
25655 mi_delta = - mi_delta;
25657 final_start_function (emit_barrier (), file, 1);
25659 if (TARGET_THUMB1)
25661 int labelno = thunk_label++;
25662 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
25663 /* Thunks are entered in arm mode when avaiable. */
25664 if (TARGET_THUMB1_ONLY)
25666 /* push r3 so we can use it as a temporary. */
25667 /* TODO: Omit this save if r3 is not used. */
25668 fputs ("\tpush {r3}\n", file);
25669 fputs ("\tldr\tr3, ", file);
25671 else
25673 fputs ("\tldr\tr12, ", file);
25675 assemble_name (file, label);
25676 fputc ('\n', file);
25677 if (flag_pic)
25679 /* If we are generating PIC, the ldr instruction below loads
25680 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
25681 the address of the add + 8, so we have:
25683 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
25684 = target + 1.
25686 Note that we have "+ 1" because some versions of GNU ld
25687 don't set the low bit of the result for R_ARM_REL32
25688 relocations against thumb function symbols.
25689 On ARMv6M this is +4, not +8. */
25690 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
25691 assemble_name (file, labelpc);
25692 fputs (":\n", file);
25693 if (TARGET_THUMB1_ONLY)
25695 /* This is 2 insns after the start of the thunk, so we know it
25696 is 4-byte aligned. */
25697 fputs ("\tadd\tr3, pc, r3\n", file);
25698 fputs ("\tmov r12, r3\n", file);
25700 else
25701 fputs ("\tadd\tr12, pc, r12\n", file);
25703 else if (TARGET_THUMB1_ONLY)
25704 fputs ("\tmov r12, r3\n", file);
25706 if (TARGET_THUMB1_ONLY)
25708 if (mi_delta > 255)
25710 fputs ("\tldr\tr3, ", file);
25711 assemble_name (file, label);
25712 fputs ("+4\n", file);
25713 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
25714 mi_op, this_regno, this_regno);
25716 else if (mi_delta != 0)
25718 /* Thumb1 unified syntax requires s suffix in instruction name when
25719 one of the operands is immediate. */
25720 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
25721 mi_op, this_regno, this_regno,
25722 mi_delta);
25725 else
25727 /* TODO: Use movw/movt for large constants when available. */
25728 while (mi_delta != 0)
25730 if ((mi_delta & (3 << shift)) == 0)
25731 shift += 2;
25732 else
25734 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
25735 mi_op, this_regno, this_regno,
25736 mi_delta & (0xff << shift));
25737 mi_delta &= ~(0xff << shift);
25738 shift += 8;
25742 if (TARGET_THUMB1)
25744 if (TARGET_THUMB1_ONLY)
25745 fputs ("\tpop\t{r3}\n", file);
25747 fprintf (file, "\tbx\tr12\n");
25748 ASM_OUTPUT_ALIGN (file, 2);
25749 assemble_name (file, label);
25750 fputs (":\n", file);
25751 if (flag_pic)
25753 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
25754 rtx tem = XEXP (DECL_RTL (function), 0);
25755 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
25756 pipeline offset is four rather than eight. Adjust the offset
25757 accordingly. */
25758 tem = plus_constant (GET_MODE (tem), tem,
25759 TARGET_THUMB1_ONLY ? -3 : -7);
25760 tem = gen_rtx_MINUS (GET_MODE (tem),
25761 tem,
25762 gen_rtx_SYMBOL_REF (Pmode,
25763 ggc_strdup (labelpc)));
25764 assemble_integer (tem, 4, BITS_PER_WORD, 1);
25766 else
25767 /* Output ".word .LTHUNKn". */
25768 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
25770 if (TARGET_THUMB1_ONLY && mi_delta > 255)
25771 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
25773 else
25775 fputs ("\tb\t", file);
25776 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
25777 if (NEED_PLT_RELOC)
25778 fputs ("(PLT)", file);
25779 fputc ('\n', file);
25782 final_end_function ();
25786 arm_emit_vector_const (FILE *file, rtx x)
25788 int i;
25789 const char * pattern;
25791 gcc_assert (GET_CODE (x) == CONST_VECTOR);
25793 switch (GET_MODE (x))
25795 case V2SImode: pattern = "%08x"; break;
25796 case V4HImode: pattern = "%04x"; break;
25797 case V8QImode: pattern = "%02x"; break;
25798 default: gcc_unreachable ();
25801 fprintf (file, "0x");
25802 for (i = CONST_VECTOR_NUNITS (x); i--;)
25804 rtx element;
25806 element = CONST_VECTOR_ELT (x, i);
25807 fprintf (file, pattern, INTVAL (element));
25810 return 1;
25813 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
25814 HFmode constant pool entries are actually loaded with ldr. */
25815 void
25816 arm_emit_fp16_const (rtx c)
25818 REAL_VALUE_TYPE r;
25819 long bits;
25821 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
25822 bits = real_to_target (NULL, &r, HFmode);
25823 if (WORDS_BIG_ENDIAN)
25824 assemble_zeros (2);
25825 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
25826 if (!WORDS_BIG_ENDIAN)
25827 assemble_zeros (2);
25830 const char *
25831 arm_output_load_gr (rtx *operands)
25833 rtx reg;
25834 rtx offset;
25835 rtx wcgr;
25836 rtx sum;
25838 if (!MEM_P (operands [1])
25839 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
25840 || !REG_P (reg = XEXP (sum, 0))
25841 || !CONST_INT_P (offset = XEXP (sum, 1))
25842 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
25843 return "wldrw%?\t%0, %1";
25845 /* Fix up an out-of-range load of a GR register. */
25846 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
25847 wcgr = operands[0];
25848 operands[0] = reg;
25849 output_asm_insn ("ldr%?\t%0, %1", operands);
25851 operands[0] = wcgr;
25852 operands[1] = reg;
25853 output_asm_insn ("tmcr%?\t%0, %1", operands);
25854 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
25856 return "";
25859 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
25861 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
25862 named arg and all anonymous args onto the stack.
25863 XXX I know the prologue shouldn't be pushing registers, but it is faster
25864 that way. */
25866 static void
25867 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
25868 machine_mode mode,
25869 tree type,
25870 int *pretend_size,
25871 int second_time ATTRIBUTE_UNUSED)
25873 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
25874 int nregs;
25876 cfun->machine->uses_anonymous_args = 1;
25877 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
25879 nregs = pcum->aapcs_ncrn;
25880 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
25881 nregs++;
25883 else
25884 nregs = pcum->nregs;
25886 if (nregs < NUM_ARG_REGS)
25887 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
25890 /* We can't rely on the caller doing the proper promotion when
25891 using APCS or ATPCS. */
25893 static bool
25894 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
25896 return !TARGET_AAPCS_BASED;
25899 static machine_mode
25900 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
25901 machine_mode mode,
25902 int *punsignedp ATTRIBUTE_UNUSED,
25903 const_tree fntype ATTRIBUTE_UNUSED,
25904 int for_return ATTRIBUTE_UNUSED)
25906 if (GET_MODE_CLASS (mode) == MODE_INT
25907 && GET_MODE_SIZE (mode) < 4)
25908 return SImode;
25910 return mode;
25913 /* AAPCS based ABIs use short enums by default. */
25915 static bool
25916 arm_default_short_enums (void)
25918 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
25922 /* AAPCS requires that anonymous bitfields affect structure alignment. */
25924 static bool
25925 arm_align_anon_bitfield (void)
25927 return TARGET_AAPCS_BASED;
25931 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
25933 static tree
25934 arm_cxx_guard_type (void)
25936 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
25940 /* The EABI says test the least significant bit of a guard variable. */
25942 static bool
25943 arm_cxx_guard_mask_bit (void)
25945 return TARGET_AAPCS_BASED;
25949 /* The EABI specifies that all array cookies are 8 bytes long. */
25951 static tree
25952 arm_get_cookie_size (tree type)
25954 tree size;
25956 if (!TARGET_AAPCS_BASED)
25957 return default_cxx_get_cookie_size (type);
25959 size = build_int_cst (sizetype, 8);
25960 return size;
25964 /* The EABI says that array cookies should also contain the element size. */
25966 static bool
25967 arm_cookie_has_size (void)
25969 return TARGET_AAPCS_BASED;
25973 /* The EABI says constructors and destructors should return a pointer to
25974 the object constructed/destroyed. */
25976 static bool
25977 arm_cxx_cdtor_returns_this (void)
25979 return TARGET_AAPCS_BASED;
25982 /* The EABI says that an inline function may never be the key
25983 method. */
25985 static bool
25986 arm_cxx_key_method_may_be_inline (void)
25988 return !TARGET_AAPCS_BASED;
25991 static void
25992 arm_cxx_determine_class_data_visibility (tree decl)
25994 if (!TARGET_AAPCS_BASED
25995 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
25996 return;
25998 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
25999 is exported. However, on systems without dynamic vague linkage,
26000 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26001 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26002 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26003 else
26004 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26005 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26008 static bool
26009 arm_cxx_class_data_always_comdat (void)
26011 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26012 vague linkage if the class has no key function. */
26013 return !TARGET_AAPCS_BASED;
26017 /* The EABI says __aeabi_atexit should be used to register static
26018 destructors. */
26020 static bool
26021 arm_cxx_use_aeabi_atexit (void)
26023 return TARGET_AAPCS_BASED;
26027 void
26028 arm_set_return_address (rtx source, rtx scratch)
26030 arm_stack_offsets *offsets;
26031 HOST_WIDE_INT delta;
26032 rtx addr;
26033 unsigned long saved_regs;
26035 offsets = arm_get_frame_offsets ();
26036 saved_regs = offsets->saved_regs_mask;
26038 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26039 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26040 else
26042 if (frame_pointer_needed)
26043 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26044 else
26046 /* LR will be the first saved register. */
26047 delta = offsets->outgoing_args - (offsets->frame + 4);
26050 if (delta >= 4096)
26052 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26053 GEN_INT (delta & ~4095)));
26054 addr = scratch;
26055 delta &= 4095;
26057 else
26058 addr = stack_pointer_rtx;
26060 addr = plus_constant (Pmode, addr, delta);
26062 /* The store needs to be marked as frame related in order to prevent
26063 DSE from deleting it as dead if it is based on fp. */
26064 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26065 RTX_FRAME_RELATED_P (insn) = 1;
26066 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26071 void
26072 thumb_set_return_address (rtx source, rtx scratch)
26074 arm_stack_offsets *offsets;
26075 HOST_WIDE_INT delta;
26076 HOST_WIDE_INT limit;
26077 int reg;
26078 rtx addr;
26079 unsigned long mask;
26081 emit_use (source);
26083 offsets = arm_get_frame_offsets ();
26084 mask = offsets->saved_regs_mask;
26085 if (mask & (1 << LR_REGNUM))
26087 limit = 1024;
26088 /* Find the saved regs. */
26089 if (frame_pointer_needed)
26091 delta = offsets->soft_frame - offsets->saved_args;
26092 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26093 if (TARGET_THUMB1)
26094 limit = 128;
26096 else
26098 delta = offsets->outgoing_args - offsets->saved_args;
26099 reg = SP_REGNUM;
26101 /* Allow for the stack frame. */
26102 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26103 delta -= 16;
26104 /* The link register is always the first saved register. */
26105 delta -= 4;
26107 /* Construct the address. */
26108 addr = gen_rtx_REG (SImode, reg);
26109 if (delta > limit)
26111 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26112 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26113 addr = scratch;
26115 else
26116 addr = plus_constant (Pmode, addr, delta);
26118 /* The store needs to be marked as frame related in order to prevent
26119 DSE from deleting it as dead if it is based on fp. */
26120 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26121 RTX_FRAME_RELATED_P (insn) = 1;
26122 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26124 else
26125 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26128 /* Implements target hook vector_mode_supported_p. */
26129 bool
26130 arm_vector_mode_supported_p (machine_mode mode)
26132 /* Neon also supports V2SImode, etc. listed in the clause below. */
26133 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26134 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
26135 return true;
26137 if ((TARGET_NEON || TARGET_IWMMXT)
26138 && ((mode == V2SImode)
26139 || (mode == V4HImode)
26140 || (mode == V8QImode)))
26141 return true;
26143 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26144 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26145 || mode == V2HAmode))
26146 return true;
26148 return false;
26151 /* Implements target hook array_mode_supported_p. */
26153 static bool
26154 arm_array_mode_supported_p (machine_mode mode,
26155 unsigned HOST_WIDE_INT nelems)
26157 if (TARGET_NEON
26158 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26159 && (nelems >= 2 && nelems <= 4))
26160 return true;
26162 return false;
26165 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26166 registers when autovectorizing for Neon, at least until multiple vector
26167 widths are supported properly by the middle-end. */
26169 static machine_mode
26170 arm_preferred_simd_mode (machine_mode mode)
26172 if (TARGET_NEON)
26173 switch (mode)
26175 case SFmode:
26176 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26177 case SImode:
26178 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26179 case HImode:
26180 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26181 case QImode:
26182 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26183 case DImode:
26184 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26185 return V2DImode;
26186 break;
26188 default:;
26191 if (TARGET_REALLY_IWMMXT)
26192 switch (mode)
26194 case SImode:
26195 return V2SImode;
26196 case HImode:
26197 return V4HImode;
26198 case QImode:
26199 return V8QImode;
26201 default:;
26204 return word_mode;
26207 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26209 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26210 using r0-r4 for function arguments, r7 for the stack frame and don't have
26211 enough left over to do doubleword arithmetic. For Thumb-2 all the
26212 potentially problematic instructions accept high registers so this is not
26213 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26214 that require many low registers. */
26215 static bool
26216 arm_class_likely_spilled_p (reg_class_t rclass)
26218 if ((TARGET_THUMB1 && rclass == LO_REGS)
26219 || rclass == CC_REG)
26220 return true;
26222 return false;
26225 /* Implements target hook small_register_classes_for_mode_p. */
26226 bool
26227 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26229 return TARGET_THUMB1;
26232 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26233 ARM insns and therefore guarantee that the shift count is modulo 256.
26234 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26235 guarantee no particular behavior for out-of-range counts. */
26237 static unsigned HOST_WIDE_INT
26238 arm_shift_truncation_mask (machine_mode mode)
26240 return mode == SImode ? 255 : 0;
26244 /* Map internal gcc register numbers to DWARF2 register numbers. */
26246 unsigned int
26247 arm_dbx_register_number (unsigned int regno)
26249 if (regno < 16)
26250 return regno;
26252 if (IS_VFP_REGNUM (regno))
26254 /* See comment in arm_dwarf_register_span. */
26255 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26256 return 64 + regno - FIRST_VFP_REGNUM;
26257 else
26258 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26261 if (IS_IWMMXT_GR_REGNUM (regno))
26262 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26264 if (IS_IWMMXT_REGNUM (regno))
26265 return 112 + regno - FIRST_IWMMXT_REGNUM;
26267 gcc_unreachable ();
26270 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26271 GCC models tham as 64 32-bit registers, so we need to describe this to
26272 the DWARF generation code. Other registers can use the default. */
26273 static rtx
26274 arm_dwarf_register_span (rtx rtl)
26276 machine_mode mode;
26277 unsigned regno;
26278 rtx parts[16];
26279 int nregs;
26280 int i;
26282 regno = REGNO (rtl);
26283 if (!IS_VFP_REGNUM (regno))
26284 return NULL_RTX;
26286 /* XXX FIXME: The EABI defines two VFP register ranges:
26287 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26288 256-287: D0-D31
26289 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26290 corresponding D register. Until GDB supports this, we shall use the
26291 legacy encodings. We also use these encodings for D0-D15 for
26292 compatibility with older debuggers. */
26293 mode = GET_MODE (rtl);
26294 if (GET_MODE_SIZE (mode) < 8)
26295 return NULL_RTX;
26297 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26299 nregs = GET_MODE_SIZE (mode) / 4;
26300 for (i = 0; i < nregs; i += 2)
26301 if (TARGET_BIG_END)
26303 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
26304 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
26306 else
26308 parts[i] = gen_rtx_REG (SImode, regno + i);
26309 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
26312 else
26314 nregs = GET_MODE_SIZE (mode) / 8;
26315 for (i = 0; i < nregs; i++)
26316 parts[i] = gen_rtx_REG (DImode, regno + i);
26319 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
26322 #if ARM_UNWIND_INFO
26323 /* Emit unwind directives for a store-multiple instruction or stack pointer
26324 push during alignment.
26325 These should only ever be generated by the function prologue code, so
26326 expect them to have a particular form.
26327 The store-multiple instruction sometimes pushes pc as the last register,
26328 although it should not be tracked into unwind information, or for -Os
26329 sometimes pushes some dummy registers before first register that needs
26330 to be tracked in unwind information; such dummy registers are there just
26331 to avoid separate stack adjustment, and will not be restored in the
26332 epilogue. */
26334 static void
26335 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26337 int i;
26338 HOST_WIDE_INT offset;
26339 HOST_WIDE_INT nregs;
26340 int reg_size;
26341 unsigned reg;
26342 unsigned lastreg;
26343 unsigned padfirst = 0, padlast = 0;
26344 rtx e;
26346 e = XVECEXP (p, 0, 0);
26347 gcc_assert (GET_CODE (e) == SET);
26349 /* First insn will adjust the stack pointer. */
26350 gcc_assert (GET_CODE (e) == SET
26351 && REG_P (SET_DEST (e))
26352 && REGNO (SET_DEST (e)) == SP_REGNUM
26353 && GET_CODE (SET_SRC (e)) == PLUS);
26355 offset = -INTVAL (XEXP (SET_SRC (e), 1));
26356 nregs = XVECLEN (p, 0) - 1;
26357 gcc_assert (nregs);
26359 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
26360 if (reg < 16)
26362 /* For -Os dummy registers can be pushed at the beginning to
26363 avoid separate stack pointer adjustment. */
26364 e = XVECEXP (p, 0, 1);
26365 e = XEXP (SET_DEST (e), 0);
26366 if (GET_CODE (e) == PLUS)
26367 padfirst = INTVAL (XEXP (e, 1));
26368 gcc_assert (padfirst == 0 || optimize_size);
26369 /* The function prologue may also push pc, but not annotate it as it is
26370 never restored. We turn this into a stack pointer adjustment. */
26371 e = XVECEXP (p, 0, nregs);
26372 e = XEXP (SET_DEST (e), 0);
26373 if (GET_CODE (e) == PLUS)
26374 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
26375 else
26376 padlast = offset - 4;
26377 gcc_assert (padlast == 0 || padlast == 4);
26378 if (padlast == 4)
26379 fprintf (asm_out_file, "\t.pad #4\n");
26380 reg_size = 4;
26381 fprintf (asm_out_file, "\t.save {");
26383 else if (IS_VFP_REGNUM (reg))
26385 reg_size = 8;
26386 fprintf (asm_out_file, "\t.vsave {");
26388 else
26389 /* Unknown register type. */
26390 gcc_unreachable ();
26392 /* If the stack increment doesn't match the size of the saved registers,
26393 something has gone horribly wrong. */
26394 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
26396 offset = padfirst;
26397 lastreg = 0;
26398 /* The remaining insns will describe the stores. */
26399 for (i = 1; i <= nregs; i++)
26401 /* Expect (set (mem <addr>) (reg)).
26402 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
26403 e = XVECEXP (p, 0, i);
26404 gcc_assert (GET_CODE (e) == SET
26405 && MEM_P (SET_DEST (e))
26406 && REG_P (SET_SRC (e)));
26408 reg = REGNO (SET_SRC (e));
26409 gcc_assert (reg >= lastreg);
26411 if (i != 1)
26412 fprintf (asm_out_file, ", ");
26413 /* We can't use %r for vfp because we need to use the
26414 double precision register names. */
26415 if (IS_VFP_REGNUM (reg))
26416 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
26417 else
26418 asm_fprintf (asm_out_file, "%r", reg);
26420 #ifdef ENABLE_CHECKING
26421 /* Check that the addresses are consecutive. */
26422 e = XEXP (SET_DEST (e), 0);
26423 if (GET_CODE (e) == PLUS)
26424 gcc_assert (REG_P (XEXP (e, 0))
26425 && REGNO (XEXP (e, 0)) == SP_REGNUM
26426 && CONST_INT_P (XEXP (e, 1))
26427 && offset == INTVAL (XEXP (e, 1)));
26428 else
26429 gcc_assert (i == 1
26430 && REG_P (e)
26431 && REGNO (e) == SP_REGNUM);
26432 offset += reg_size;
26433 #endif
26435 fprintf (asm_out_file, "}\n");
26436 if (padfirst)
26437 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
26440 /* Emit unwind directives for a SET. */
26442 static void
26443 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
26445 rtx e0;
26446 rtx e1;
26447 unsigned reg;
26449 e0 = XEXP (p, 0);
26450 e1 = XEXP (p, 1);
26451 switch (GET_CODE (e0))
26453 case MEM:
26454 /* Pushing a single register. */
26455 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
26456 || !REG_P (XEXP (XEXP (e0, 0), 0))
26457 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
26458 abort ();
26460 asm_fprintf (asm_out_file, "\t.save ");
26461 if (IS_VFP_REGNUM (REGNO (e1)))
26462 asm_fprintf(asm_out_file, "{d%d}\n",
26463 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
26464 else
26465 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
26466 break;
26468 case REG:
26469 if (REGNO (e0) == SP_REGNUM)
26471 /* A stack increment. */
26472 if (GET_CODE (e1) != PLUS
26473 || !REG_P (XEXP (e1, 0))
26474 || REGNO (XEXP (e1, 0)) != SP_REGNUM
26475 || !CONST_INT_P (XEXP (e1, 1)))
26476 abort ();
26478 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
26479 -INTVAL (XEXP (e1, 1)));
26481 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
26483 HOST_WIDE_INT offset;
26485 if (GET_CODE (e1) == PLUS)
26487 if (!REG_P (XEXP (e1, 0))
26488 || !CONST_INT_P (XEXP (e1, 1)))
26489 abort ();
26490 reg = REGNO (XEXP (e1, 0));
26491 offset = INTVAL (XEXP (e1, 1));
26492 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
26493 HARD_FRAME_POINTER_REGNUM, reg,
26494 offset);
26496 else if (REG_P (e1))
26498 reg = REGNO (e1);
26499 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
26500 HARD_FRAME_POINTER_REGNUM, reg);
26502 else
26503 abort ();
26505 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
26507 /* Move from sp to reg. */
26508 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
26510 else if (GET_CODE (e1) == PLUS
26511 && REG_P (XEXP (e1, 0))
26512 && REGNO (XEXP (e1, 0)) == SP_REGNUM
26513 && CONST_INT_P (XEXP (e1, 1)))
26515 /* Set reg to offset from sp. */
26516 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
26517 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
26519 else
26520 abort ();
26521 break;
26523 default:
26524 abort ();
26529 /* Emit unwind directives for the given insn. */
26531 static void
26532 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
26534 rtx note, pat;
26535 bool handled_one = false;
26537 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26538 return;
26540 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26541 && (TREE_NOTHROW (current_function_decl)
26542 || crtl->all_throwers_are_sibcalls))
26543 return;
26545 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
26546 return;
26548 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
26550 switch (REG_NOTE_KIND (note))
26552 case REG_FRAME_RELATED_EXPR:
26553 pat = XEXP (note, 0);
26554 goto found;
26556 case REG_CFA_REGISTER:
26557 pat = XEXP (note, 0);
26558 if (pat == NULL)
26560 pat = PATTERN (insn);
26561 if (GET_CODE (pat) == PARALLEL)
26562 pat = XVECEXP (pat, 0, 0);
26565 /* Only emitted for IS_STACKALIGN re-alignment. */
26567 rtx dest, src;
26568 unsigned reg;
26570 src = SET_SRC (pat);
26571 dest = SET_DEST (pat);
26573 gcc_assert (src == stack_pointer_rtx);
26574 reg = REGNO (dest);
26575 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26576 reg + 0x90, reg);
26578 handled_one = true;
26579 break;
26581 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
26582 to get correct dwarf information for shrink-wrap. We should not
26583 emit unwind information for it because these are used either for
26584 pretend arguments or notes to adjust sp and restore registers from
26585 stack. */
26586 case REG_CFA_DEF_CFA:
26587 case REG_CFA_ADJUST_CFA:
26588 case REG_CFA_RESTORE:
26589 return;
26591 case REG_CFA_EXPRESSION:
26592 case REG_CFA_OFFSET:
26593 /* ??? Only handling here what we actually emit. */
26594 gcc_unreachable ();
26596 default:
26597 break;
26600 if (handled_one)
26601 return;
26602 pat = PATTERN (insn);
26603 found:
26605 switch (GET_CODE (pat))
26607 case SET:
26608 arm_unwind_emit_set (asm_out_file, pat);
26609 break;
26611 case SEQUENCE:
26612 /* Store multiple. */
26613 arm_unwind_emit_sequence (asm_out_file, pat);
26614 break;
26616 default:
26617 abort();
26622 /* Output a reference from a function exception table to the type_info
26623 object X. The EABI specifies that the symbol should be relocated by
26624 an R_ARM_TARGET2 relocation. */
26626 static bool
26627 arm_output_ttype (rtx x)
26629 fputs ("\t.word\t", asm_out_file);
26630 output_addr_const (asm_out_file, x);
26631 /* Use special relocations for symbol references. */
26632 if (!CONST_INT_P (x))
26633 fputs ("(TARGET2)", asm_out_file);
26634 fputc ('\n', asm_out_file);
26636 return TRUE;
26639 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
26641 static void
26642 arm_asm_emit_except_personality (rtx personality)
26644 fputs ("\t.personality\t", asm_out_file);
26645 output_addr_const (asm_out_file, personality);
26646 fputc ('\n', asm_out_file);
26649 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
26651 static void
26652 arm_asm_init_sections (void)
26654 exception_section = get_unnamed_section (0, output_section_asm_op,
26655 "\t.handlerdata");
26657 #endif /* ARM_UNWIND_INFO */
26659 /* Output unwind directives for the start/end of a function. */
26661 void
26662 arm_output_fn_unwind (FILE * f, bool prologue)
26664 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26665 return;
26667 if (prologue)
26668 fputs ("\t.fnstart\n", f);
26669 else
26671 /* If this function will never be unwound, then mark it as such.
26672 The came condition is used in arm_unwind_emit to suppress
26673 the frame annotations. */
26674 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26675 && (TREE_NOTHROW (current_function_decl)
26676 || crtl->all_throwers_are_sibcalls))
26677 fputs("\t.cantunwind\n", f);
26679 fputs ("\t.fnend\n", f);
26683 static bool
26684 arm_emit_tls_decoration (FILE *fp, rtx x)
26686 enum tls_reloc reloc;
26687 rtx val;
26689 val = XVECEXP (x, 0, 0);
26690 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
26692 output_addr_const (fp, val);
26694 switch (reloc)
26696 case TLS_GD32:
26697 fputs ("(tlsgd)", fp);
26698 break;
26699 case TLS_LDM32:
26700 fputs ("(tlsldm)", fp);
26701 break;
26702 case TLS_LDO32:
26703 fputs ("(tlsldo)", fp);
26704 break;
26705 case TLS_IE32:
26706 fputs ("(gottpoff)", fp);
26707 break;
26708 case TLS_LE32:
26709 fputs ("(tpoff)", fp);
26710 break;
26711 case TLS_DESCSEQ:
26712 fputs ("(tlsdesc)", fp);
26713 break;
26714 default:
26715 gcc_unreachable ();
26718 switch (reloc)
26720 case TLS_GD32:
26721 case TLS_LDM32:
26722 case TLS_IE32:
26723 case TLS_DESCSEQ:
26724 fputs (" + (. - ", fp);
26725 output_addr_const (fp, XVECEXP (x, 0, 2));
26726 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
26727 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
26728 output_addr_const (fp, XVECEXP (x, 0, 3));
26729 fputc (')', fp);
26730 break;
26731 default:
26732 break;
26735 return TRUE;
26738 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
26740 static void
26741 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
26743 gcc_assert (size == 4);
26744 fputs ("\t.word\t", file);
26745 output_addr_const (file, x);
26746 fputs ("(tlsldo)", file);
26749 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
26751 static bool
26752 arm_output_addr_const_extra (FILE *fp, rtx x)
26754 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
26755 return arm_emit_tls_decoration (fp, x);
26756 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
26758 char label[256];
26759 int labelno = INTVAL (XVECEXP (x, 0, 0));
26761 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
26762 assemble_name_raw (fp, label);
26764 return TRUE;
26766 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
26768 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
26769 if (GOT_PCREL)
26770 fputs ("+.", fp);
26771 fputs ("-(", fp);
26772 output_addr_const (fp, XVECEXP (x, 0, 0));
26773 fputc (')', fp);
26774 return TRUE;
26776 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
26778 output_addr_const (fp, XVECEXP (x, 0, 0));
26779 if (GOT_PCREL)
26780 fputs ("+.", fp);
26781 fputs ("-(", fp);
26782 output_addr_const (fp, XVECEXP (x, 0, 1));
26783 fputc (')', fp);
26784 return TRUE;
26786 else if (GET_CODE (x) == CONST_VECTOR)
26787 return arm_emit_vector_const (fp, x);
26789 return FALSE;
26792 /* Output assembly for a shift instruction.
26793 SET_FLAGS determines how the instruction modifies the condition codes.
26794 0 - Do not set condition codes.
26795 1 - Set condition codes.
26796 2 - Use smallest instruction. */
26797 const char *
26798 arm_output_shift(rtx * operands, int set_flags)
26800 char pattern[100];
26801 static const char flag_chars[3] = {'?', '.', '!'};
26802 const char *shift;
26803 HOST_WIDE_INT val;
26804 char c;
26806 c = flag_chars[set_flags];
26807 if (TARGET_UNIFIED_ASM)
26809 shift = shift_op(operands[3], &val);
26810 if (shift)
26812 if (val != -1)
26813 operands[2] = GEN_INT(val);
26814 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
26816 else
26817 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
26819 else
26820 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
26821 output_asm_insn (pattern, operands);
26822 return "";
26825 /* Output assembly for a WMMX immediate shift instruction. */
26826 const char *
26827 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
26829 int shift = INTVAL (operands[2]);
26830 char templ[50];
26831 machine_mode opmode = GET_MODE (operands[0]);
26833 gcc_assert (shift >= 0);
26835 /* If the shift value in the register versions is > 63 (for D qualifier),
26836 31 (for W qualifier) or 15 (for H qualifier). */
26837 if (((opmode == V4HImode) && (shift > 15))
26838 || ((opmode == V2SImode) && (shift > 31))
26839 || ((opmode == DImode) && (shift > 63)))
26841 if (wror_or_wsra)
26843 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26844 output_asm_insn (templ, operands);
26845 if (opmode == DImode)
26847 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
26848 output_asm_insn (templ, operands);
26851 else
26853 /* The destination register will contain all zeros. */
26854 sprintf (templ, "wzero\t%%0");
26855 output_asm_insn (templ, operands);
26857 return "";
26860 if ((opmode == DImode) && (shift > 32))
26862 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26863 output_asm_insn (templ, operands);
26864 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
26865 output_asm_insn (templ, operands);
26867 else
26869 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
26870 output_asm_insn (templ, operands);
26872 return "";
26875 /* Output assembly for a WMMX tinsr instruction. */
26876 const char *
26877 arm_output_iwmmxt_tinsr (rtx *operands)
26879 int mask = INTVAL (operands[3]);
26880 int i;
26881 char templ[50];
26882 int units = mode_nunits[GET_MODE (operands[0])];
26883 gcc_assert ((mask & (mask - 1)) == 0);
26884 for (i = 0; i < units; ++i)
26886 if ((mask & 0x01) == 1)
26888 break;
26890 mask >>= 1;
26892 gcc_assert (i < units);
26894 switch (GET_MODE (operands[0]))
26896 case V8QImode:
26897 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
26898 break;
26899 case V4HImode:
26900 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
26901 break;
26902 case V2SImode:
26903 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
26904 break;
26905 default:
26906 gcc_unreachable ();
26907 break;
26909 output_asm_insn (templ, operands);
26911 return "";
26914 /* Output a Thumb-1 casesi dispatch sequence. */
26915 const char *
26916 thumb1_output_casesi (rtx *operands)
26918 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
26920 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
26922 switch (GET_MODE(diff_vec))
26924 case QImode:
26925 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
26926 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
26927 case HImode:
26928 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
26929 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
26930 case SImode:
26931 return "bl\t%___gnu_thumb1_case_si";
26932 default:
26933 gcc_unreachable ();
26937 /* Output a Thumb-2 casesi instruction. */
26938 const char *
26939 thumb2_output_casesi (rtx *operands)
26941 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
26943 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
26945 output_asm_insn ("cmp\t%0, %1", operands);
26946 output_asm_insn ("bhi\t%l3", operands);
26947 switch (GET_MODE(diff_vec))
26949 case QImode:
26950 return "tbb\t[%|pc, %0]";
26951 case HImode:
26952 return "tbh\t[%|pc, %0, lsl #1]";
26953 case SImode:
26954 if (flag_pic)
26956 output_asm_insn ("adr\t%4, %l2", operands);
26957 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
26958 output_asm_insn ("add\t%4, %4, %5", operands);
26959 return "bx\t%4";
26961 else
26963 output_asm_insn ("adr\t%4, %l2", operands);
26964 return "ldr\t%|pc, [%4, %0, lsl #2]";
26966 default:
26967 gcc_unreachable ();
26971 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
26972 per-core tuning structs. */
26973 static int
26974 arm_issue_rate (void)
26976 return current_tune->issue_rate;
26979 /* Return how many instructions should scheduler lookahead to choose the
26980 best one. */
26981 static int
26982 arm_first_cycle_multipass_dfa_lookahead (void)
26984 int issue_rate = arm_issue_rate ();
26986 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
26989 /* Enable modeling of L2 auto-prefetcher. */
26990 static int
26991 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
26993 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
26996 const char *
26997 arm_mangle_type (const_tree type)
26999 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27000 has to be managled as if it is in the "std" namespace. */
27001 if (TARGET_AAPCS_BASED
27002 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27003 return "St9__va_list";
27005 /* Half-precision float. */
27006 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27007 return "Dh";
27009 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27010 builtin type. */
27011 if (TYPE_NAME (type) != NULL)
27012 return arm_mangle_builtin_type (type);
27014 /* Use the default mangling. */
27015 return NULL;
27018 /* Order of allocation of core registers for Thumb: this allocation is
27019 written over the corresponding initial entries of the array
27020 initialized with REG_ALLOC_ORDER. We allocate all low registers
27021 first. Saving and restoring a low register is usually cheaper than
27022 using a call-clobbered high register. */
27024 static const int thumb_core_reg_alloc_order[] =
27026 3, 2, 1, 0, 4, 5, 6, 7,
27027 14, 12, 8, 9, 10, 11
27030 /* Adjust register allocation order when compiling for Thumb. */
27032 void
27033 arm_order_regs_for_local_alloc (void)
27035 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27036 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27037 if (TARGET_THUMB)
27038 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27039 sizeof (thumb_core_reg_alloc_order));
27042 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27044 bool
27045 arm_frame_pointer_required (void)
27047 return (cfun->has_nonlocal_label
27048 || SUBTARGET_FRAME_POINTER_REQUIRED
27049 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
27052 /* Only thumb1 can't support conditional execution, so return true if
27053 the target is not thumb1. */
27054 static bool
27055 arm_have_conditional_execution (void)
27057 return !TARGET_THUMB1;
27060 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27061 static HOST_WIDE_INT
27062 arm_vector_alignment (const_tree type)
27064 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27066 if (TARGET_AAPCS_BASED)
27067 align = MIN (align, 64);
27069 return align;
27072 static unsigned int
27073 arm_autovectorize_vector_sizes (void)
27075 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27078 static bool
27079 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27081 /* Vectors which aren't in packed structures will not be less aligned than
27082 the natural alignment of their element type, so this is safe. */
27083 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27084 return !is_packed;
27086 return default_builtin_vector_alignment_reachable (type, is_packed);
27089 static bool
27090 arm_builtin_support_vector_misalignment (machine_mode mode,
27091 const_tree type, int misalignment,
27092 bool is_packed)
27094 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27096 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27098 if (is_packed)
27099 return align == 1;
27101 /* If the misalignment is unknown, we should be able to handle the access
27102 so long as it is not to a member of a packed data structure. */
27103 if (misalignment == -1)
27104 return true;
27106 /* Return true if the misalignment is a multiple of the natural alignment
27107 of the vector's element type. This is probably always going to be
27108 true in practice, since we've already established that this isn't a
27109 packed access. */
27110 return ((misalignment % align) == 0);
27113 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27114 is_packed);
27117 static void
27118 arm_conditional_register_usage (void)
27120 int regno;
27122 if (TARGET_THUMB1 && optimize_size)
27124 /* When optimizing for size on Thumb-1, it's better not
27125 to use the HI regs, because of the overhead of
27126 stacking them. */
27127 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27128 fixed_regs[regno] = call_used_regs[regno] = 1;
27131 /* The link register can be clobbered by any branch insn,
27132 but we have no way to track that at present, so mark
27133 it as unavailable. */
27134 if (TARGET_THUMB1)
27135 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27137 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
27139 /* VFPv3 registers are disabled when earlier VFP
27140 versions are selected due to the definition of
27141 LAST_VFP_REGNUM. */
27142 for (regno = FIRST_VFP_REGNUM;
27143 regno <= LAST_VFP_REGNUM; ++ regno)
27145 fixed_regs[regno] = 0;
27146 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27147 || regno >= FIRST_VFP_REGNUM + 32;
27151 if (TARGET_REALLY_IWMMXT)
27153 regno = FIRST_IWMMXT_GR_REGNUM;
27154 /* The 2002/10/09 revision of the XScale ABI has wCG0
27155 and wCG1 as call-preserved registers. The 2002/11/21
27156 revision changed this so that all wCG registers are
27157 scratch registers. */
27158 for (regno = FIRST_IWMMXT_GR_REGNUM;
27159 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27160 fixed_regs[regno] = 0;
27161 /* The XScale ABI has wR0 - wR9 as scratch registers,
27162 the rest as call-preserved registers. */
27163 for (regno = FIRST_IWMMXT_REGNUM;
27164 regno <= LAST_IWMMXT_REGNUM; ++ regno)
27166 fixed_regs[regno] = 0;
27167 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27171 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27173 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27174 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27176 else if (TARGET_APCS_STACK)
27178 fixed_regs[10] = 1;
27179 call_used_regs[10] = 1;
27181 /* -mcaller-super-interworking reserves r11 for calls to
27182 _interwork_r11_call_via_rN(). Making the register global
27183 is an easy way of ensuring that it remains valid for all
27184 calls. */
27185 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27186 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27188 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27189 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27190 if (TARGET_CALLER_INTERWORKING)
27191 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27193 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27196 static reg_class_t
27197 arm_preferred_rename_class (reg_class_t rclass)
27199 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27200 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27201 and code size can be reduced. */
27202 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27203 return LO_REGS;
27204 else
27205 return NO_REGS;
27208 /* Compute the atrribute "length" of insn "*push_multi".
27209 So this function MUST be kept in sync with that insn pattern. */
27211 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27213 int i, regno, hi_reg;
27214 int num_saves = XVECLEN (parallel_op, 0);
27216 /* ARM mode. */
27217 if (TARGET_ARM)
27218 return 4;
27219 /* Thumb1 mode. */
27220 if (TARGET_THUMB1)
27221 return 2;
27223 /* Thumb2 mode. */
27224 regno = REGNO (first_op);
27225 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27226 for (i = 1; i < num_saves && !hi_reg; i++)
27228 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27229 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27232 if (!hi_reg)
27233 return 2;
27234 return 4;
27237 /* Compute the number of instructions emitted by output_move_double. */
27239 arm_count_output_move_double_insns (rtx *operands)
27241 int count;
27242 rtx ops[2];
27243 /* output_move_double may modify the operands array, so call it
27244 here on a copy of the array. */
27245 ops[0] = operands[0];
27246 ops[1] = operands[1];
27247 output_move_double (ops, false, &count);
27248 return count;
27252 vfp3_const_double_for_fract_bits (rtx operand)
27254 REAL_VALUE_TYPE r0;
27256 if (!CONST_DOUBLE_P (operand))
27257 return 0;
27259 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27260 if (exact_real_inverse (DFmode, &r0))
27262 if (exact_real_truncate (DFmode, &r0))
27264 HOST_WIDE_INT value = real_to_integer (&r0);
27265 value = value & 0xffffffff;
27266 if ((value != 0) && ( (value & (value - 1)) == 0))
27267 return int_log2 (value);
27270 return 0;
27274 vfp3_const_double_for_bits (rtx operand)
27276 REAL_VALUE_TYPE r0;
27278 if (!CONST_DOUBLE_P (operand))
27279 return 0;
27281 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27282 if (exact_real_truncate (DFmode, &r0))
27284 HOST_WIDE_INT value = real_to_integer (&r0);
27285 value = value & 0xffffffff;
27286 if ((value != 0) && ( (value & (value - 1)) == 0))
27287 return int_log2 (value);
27290 return 0;
27293 /* Emit a memory barrier around an atomic sequence according to MODEL. */
27295 static void
27296 arm_pre_atomic_barrier (enum memmodel model)
27298 if (need_atomic_barrier_p (model, true))
27299 emit_insn (gen_memory_barrier ());
27302 static void
27303 arm_post_atomic_barrier (enum memmodel model)
27305 if (need_atomic_barrier_p (model, false))
27306 emit_insn (gen_memory_barrier ());
27309 /* Emit the load-exclusive and store-exclusive instructions.
27310 Use acquire and release versions if necessary. */
27312 static void
27313 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
27315 rtx (*gen) (rtx, rtx);
27317 if (acq)
27319 switch (mode)
27321 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
27322 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
27323 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
27324 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
27325 default:
27326 gcc_unreachable ();
27329 else
27331 switch (mode)
27333 case QImode: gen = gen_arm_load_exclusiveqi; break;
27334 case HImode: gen = gen_arm_load_exclusivehi; break;
27335 case SImode: gen = gen_arm_load_exclusivesi; break;
27336 case DImode: gen = gen_arm_load_exclusivedi; break;
27337 default:
27338 gcc_unreachable ();
27342 emit_insn (gen (rval, mem));
27345 static void
27346 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
27347 rtx mem, bool rel)
27349 rtx (*gen) (rtx, rtx, rtx);
27351 if (rel)
27353 switch (mode)
27355 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
27356 case HImode: gen = gen_arm_store_release_exclusivehi; break;
27357 case SImode: gen = gen_arm_store_release_exclusivesi; break;
27358 case DImode: gen = gen_arm_store_release_exclusivedi; break;
27359 default:
27360 gcc_unreachable ();
27363 else
27365 switch (mode)
27367 case QImode: gen = gen_arm_store_exclusiveqi; break;
27368 case HImode: gen = gen_arm_store_exclusivehi; break;
27369 case SImode: gen = gen_arm_store_exclusivesi; break;
27370 case DImode: gen = gen_arm_store_exclusivedi; break;
27371 default:
27372 gcc_unreachable ();
27376 emit_insn (gen (bval, rval, mem));
27379 /* Mark the previous jump instruction as unlikely. */
27381 static void
27382 emit_unlikely_jump (rtx insn)
27384 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
27386 insn = emit_jump_insn (insn);
27387 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
27390 /* Expand a compare and swap pattern. */
27392 void
27393 arm_expand_compare_and_swap (rtx operands[])
27395 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
27396 machine_mode mode;
27397 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
27399 bval = operands[0];
27400 rval = operands[1];
27401 mem = operands[2];
27402 oldval = operands[3];
27403 newval = operands[4];
27404 is_weak = operands[5];
27405 mod_s = operands[6];
27406 mod_f = operands[7];
27407 mode = GET_MODE (mem);
27409 /* Normally the succ memory model must be stronger than fail, but in the
27410 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27411 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
27413 if (TARGET_HAVE_LDACQ
27414 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
27415 && INTVAL (mod_s) == MEMMODEL_RELEASE)
27416 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
27418 switch (mode)
27420 case QImode:
27421 case HImode:
27422 /* For narrow modes, we're going to perform the comparison in SImode,
27423 so do the zero-extension now. */
27424 rval = gen_reg_rtx (SImode);
27425 oldval = convert_modes (SImode, mode, oldval, true);
27426 /* FALLTHRU */
27428 case SImode:
27429 /* Force the value into a register if needed. We waited until after
27430 the zero-extension above to do this properly. */
27431 if (!arm_add_operand (oldval, SImode))
27432 oldval = force_reg (SImode, oldval);
27433 break;
27435 case DImode:
27436 if (!cmpdi_operand (oldval, mode))
27437 oldval = force_reg (mode, oldval);
27438 break;
27440 default:
27441 gcc_unreachable ();
27444 switch (mode)
27446 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
27447 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
27448 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
27449 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
27450 default:
27451 gcc_unreachable ();
27454 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
27456 if (mode == QImode || mode == HImode)
27457 emit_move_insn (operands[1], gen_lowpart (mode, rval));
27459 /* In all cases, we arrange for success to be signaled by Z set.
27460 This arrangement allows for the boolean result to be used directly
27461 in a subsequent branch, post optimization. */
27462 x = gen_rtx_REG (CCmode, CC_REGNUM);
27463 x = gen_rtx_EQ (SImode, x, const0_rtx);
27464 emit_insn (gen_rtx_SET (bval, x));
27467 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
27468 another memory store between the load-exclusive and store-exclusive can
27469 reset the monitor from Exclusive to Open state. This means we must wait
27470 until after reload to split the pattern, lest we get a register spill in
27471 the middle of the atomic sequence. */
27473 void
27474 arm_split_compare_and_swap (rtx operands[])
27476 rtx rval, mem, oldval, newval, scratch;
27477 machine_mode mode;
27478 enum memmodel mod_s, mod_f;
27479 bool is_weak;
27480 rtx_code_label *label1, *label2;
27481 rtx x, cond;
27483 rval = operands[0];
27484 mem = operands[1];
27485 oldval = operands[2];
27486 newval = operands[3];
27487 is_weak = (operands[4] != const0_rtx);
27488 mod_s = (enum memmodel) INTVAL (operands[5]);
27489 mod_f = (enum memmodel) INTVAL (operands[6]);
27490 scratch = operands[7];
27491 mode = GET_MODE (mem);
27493 bool use_acquire = TARGET_HAVE_LDACQ
27494 && !(mod_s == MEMMODEL_RELAXED
27495 || mod_s == MEMMODEL_CONSUME
27496 || mod_s == MEMMODEL_RELEASE);
27498 bool use_release = TARGET_HAVE_LDACQ
27499 && !(mod_s == MEMMODEL_RELAXED
27500 || mod_s == MEMMODEL_CONSUME
27501 || mod_s == MEMMODEL_ACQUIRE);
27503 /* Checks whether a barrier is needed and emits one accordingly. */
27504 if (!(use_acquire || use_release))
27505 arm_pre_atomic_barrier (mod_s);
27507 label1 = NULL;
27508 if (!is_weak)
27510 label1 = gen_label_rtx ();
27511 emit_label (label1);
27513 label2 = gen_label_rtx ();
27515 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
27517 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
27518 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27519 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27520 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
27521 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
27523 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
27525 /* Weak or strong, we want EQ to be true for success, so that we
27526 match the flags that we got from the compare above. */
27527 cond = gen_rtx_REG (CCmode, CC_REGNUM);
27528 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
27529 emit_insn (gen_rtx_SET (cond, x));
27531 if (!is_weak)
27533 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27534 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27535 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
27536 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
27539 if (mod_f != MEMMODEL_RELAXED)
27540 emit_label (label2);
27542 /* Checks whether a barrier is needed and emits one accordingly. */
27543 if (!(use_acquire || use_release))
27544 arm_post_atomic_barrier (mod_s);
27546 if (mod_f == MEMMODEL_RELAXED)
27547 emit_label (label2);
27550 void
27551 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
27552 rtx value, rtx model_rtx, rtx cond)
27554 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
27555 machine_mode mode = GET_MODE (mem);
27556 machine_mode wmode = (mode == DImode ? DImode : SImode);
27557 rtx_code_label *label;
27558 rtx x;
27560 bool use_acquire = TARGET_HAVE_LDACQ
27561 && !(model == MEMMODEL_RELAXED
27562 || model == MEMMODEL_CONSUME
27563 || model == MEMMODEL_RELEASE);
27565 bool use_release = TARGET_HAVE_LDACQ
27566 && !(model == MEMMODEL_RELAXED
27567 || model == MEMMODEL_CONSUME
27568 || model == MEMMODEL_ACQUIRE);
27570 /* Checks whether a barrier is needed and emits one accordingly. */
27571 if (!(use_acquire || use_release))
27572 arm_pre_atomic_barrier (model);
27574 label = gen_label_rtx ();
27575 emit_label (label);
27577 if (new_out)
27578 new_out = gen_lowpart (wmode, new_out);
27579 if (old_out)
27580 old_out = gen_lowpart (wmode, old_out);
27581 else
27582 old_out = new_out;
27583 value = simplify_gen_subreg (wmode, value, mode, 0);
27585 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
27587 switch (code)
27589 case SET:
27590 new_out = value;
27591 break;
27593 case NOT:
27594 x = gen_rtx_AND (wmode, old_out, value);
27595 emit_insn (gen_rtx_SET (new_out, x));
27596 x = gen_rtx_NOT (wmode, new_out);
27597 emit_insn (gen_rtx_SET (new_out, x));
27598 break;
27600 case MINUS:
27601 if (CONST_INT_P (value))
27603 value = GEN_INT (-INTVAL (value));
27604 code = PLUS;
27606 /* FALLTHRU */
27608 case PLUS:
27609 if (mode == DImode)
27611 /* DImode plus/minus need to clobber flags. */
27612 /* The adddi3 and subdi3 patterns are incorrectly written so that
27613 they require matching operands, even when we could easily support
27614 three operands. Thankfully, this can be fixed up post-splitting,
27615 as the individual add+adc patterns do accept three operands and
27616 post-reload cprop can make these moves go away. */
27617 emit_move_insn (new_out, old_out);
27618 if (code == PLUS)
27619 x = gen_adddi3 (new_out, new_out, value);
27620 else
27621 x = gen_subdi3 (new_out, new_out, value);
27622 emit_insn (x);
27623 break;
27625 /* FALLTHRU */
27627 default:
27628 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
27629 emit_insn (gen_rtx_SET (new_out, x));
27630 break;
27633 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
27634 use_release);
27636 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27637 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
27639 /* Checks whether a barrier is needed and emits one accordingly. */
27640 if (!(use_acquire || use_release))
27641 arm_post_atomic_barrier (model);
27644 #define MAX_VECT_LEN 16
27646 struct expand_vec_perm_d
27648 rtx target, op0, op1;
27649 unsigned char perm[MAX_VECT_LEN];
27650 machine_mode vmode;
27651 unsigned char nelt;
27652 bool one_vector_p;
27653 bool testing_p;
27656 /* Generate a variable permutation. */
27658 static void
27659 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
27661 machine_mode vmode = GET_MODE (target);
27662 bool one_vector_p = rtx_equal_p (op0, op1);
27664 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
27665 gcc_checking_assert (GET_MODE (op0) == vmode);
27666 gcc_checking_assert (GET_MODE (op1) == vmode);
27667 gcc_checking_assert (GET_MODE (sel) == vmode);
27668 gcc_checking_assert (TARGET_NEON);
27670 if (one_vector_p)
27672 if (vmode == V8QImode)
27673 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
27674 else
27675 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
27677 else
27679 rtx pair;
27681 if (vmode == V8QImode)
27683 pair = gen_reg_rtx (V16QImode);
27684 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
27685 pair = gen_lowpart (TImode, pair);
27686 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
27688 else
27690 pair = gen_reg_rtx (OImode);
27691 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
27692 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
27697 void
27698 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
27700 machine_mode vmode = GET_MODE (target);
27701 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
27702 bool one_vector_p = rtx_equal_p (op0, op1);
27703 rtx rmask[MAX_VECT_LEN], mask;
27705 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
27706 numbering of elements for big-endian, we must reverse the order. */
27707 gcc_checking_assert (!BYTES_BIG_ENDIAN);
27709 /* The VTBL instruction does not use a modulo index, so we must take care
27710 of that ourselves. */
27711 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
27712 for (i = 0; i < nelt; ++i)
27713 rmask[i] = mask;
27714 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
27715 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
27717 arm_expand_vec_perm_1 (target, op0, op1, sel);
27720 /* Generate or test for an insn that supports a constant permutation. */
27722 /* Recognize patterns for the VUZP insns. */
27724 static bool
27725 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
27727 unsigned int i, odd, mask, nelt = d->nelt;
27728 rtx out0, out1, in0, in1, x;
27729 rtx (*gen)(rtx, rtx, rtx, rtx);
27731 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27732 return false;
27734 /* Note that these are little-endian tests. Adjust for big-endian later. */
27735 if (d->perm[0] == 0)
27736 odd = 0;
27737 else if (d->perm[0] == 1)
27738 odd = 1;
27739 else
27740 return false;
27741 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27743 for (i = 0; i < nelt; i++)
27745 unsigned elt = (i * 2 + odd) & mask;
27746 if (d->perm[i] != elt)
27747 return false;
27750 /* Success! */
27751 if (d->testing_p)
27752 return true;
27754 switch (d->vmode)
27756 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
27757 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
27758 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
27759 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
27760 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
27761 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
27762 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
27763 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
27764 default:
27765 gcc_unreachable ();
27768 in0 = d->op0;
27769 in1 = d->op1;
27770 if (BYTES_BIG_ENDIAN)
27772 x = in0, in0 = in1, in1 = x;
27773 odd = !odd;
27776 out0 = d->target;
27777 out1 = gen_reg_rtx (d->vmode);
27778 if (odd)
27779 x = out0, out0 = out1, out1 = x;
27781 emit_insn (gen (out0, in0, in1, out1));
27782 return true;
27785 /* Recognize patterns for the VZIP insns. */
27787 static bool
27788 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
27790 unsigned int i, high, mask, nelt = d->nelt;
27791 rtx out0, out1, in0, in1, x;
27792 rtx (*gen)(rtx, rtx, rtx, rtx);
27794 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27795 return false;
27797 /* Note that these are little-endian tests. Adjust for big-endian later. */
27798 high = nelt / 2;
27799 if (d->perm[0] == high)
27801 else if (d->perm[0] == 0)
27802 high = 0;
27803 else
27804 return false;
27805 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27807 for (i = 0; i < nelt / 2; i++)
27809 unsigned elt = (i + high) & mask;
27810 if (d->perm[i * 2] != elt)
27811 return false;
27812 elt = (elt + nelt) & mask;
27813 if (d->perm[i * 2 + 1] != elt)
27814 return false;
27817 /* Success! */
27818 if (d->testing_p)
27819 return true;
27821 switch (d->vmode)
27823 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
27824 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
27825 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
27826 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
27827 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
27828 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
27829 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
27830 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
27831 default:
27832 gcc_unreachable ();
27835 in0 = d->op0;
27836 in1 = d->op1;
27837 if (BYTES_BIG_ENDIAN)
27839 x = in0, in0 = in1, in1 = x;
27840 high = !high;
27843 out0 = d->target;
27844 out1 = gen_reg_rtx (d->vmode);
27845 if (high)
27846 x = out0, out0 = out1, out1 = x;
27848 emit_insn (gen (out0, in0, in1, out1));
27849 return true;
27852 /* Recognize patterns for the VREV insns. */
27854 static bool
27855 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
27857 unsigned int i, j, diff, nelt = d->nelt;
27858 rtx (*gen)(rtx, rtx);
27860 if (!d->one_vector_p)
27861 return false;
27863 diff = d->perm[0];
27864 switch (diff)
27866 case 7:
27867 switch (d->vmode)
27869 case V16QImode: gen = gen_neon_vrev64v16qi; break;
27870 case V8QImode: gen = gen_neon_vrev64v8qi; break;
27871 default:
27872 return false;
27874 break;
27875 case 3:
27876 switch (d->vmode)
27878 case V16QImode: gen = gen_neon_vrev32v16qi; break;
27879 case V8QImode: gen = gen_neon_vrev32v8qi; break;
27880 case V8HImode: gen = gen_neon_vrev64v8hi; break;
27881 case V4HImode: gen = gen_neon_vrev64v4hi; break;
27882 default:
27883 return false;
27885 break;
27886 case 1:
27887 switch (d->vmode)
27889 case V16QImode: gen = gen_neon_vrev16v16qi; break;
27890 case V8QImode: gen = gen_neon_vrev16v8qi; break;
27891 case V8HImode: gen = gen_neon_vrev32v8hi; break;
27892 case V4HImode: gen = gen_neon_vrev32v4hi; break;
27893 case V4SImode: gen = gen_neon_vrev64v4si; break;
27894 case V2SImode: gen = gen_neon_vrev64v2si; break;
27895 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
27896 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
27897 default:
27898 return false;
27900 break;
27901 default:
27902 return false;
27905 for (i = 0; i < nelt ; i += diff + 1)
27906 for (j = 0; j <= diff; j += 1)
27908 /* This is guaranteed to be true as the value of diff
27909 is 7, 3, 1 and we should have enough elements in the
27910 queue to generate this. Getting a vector mask with a
27911 value of diff other than these values implies that
27912 something is wrong by the time we get here. */
27913 gcc_assert (i + j < nelt);
27914 if (d->perm[i + j] != i + diff - j)
27915 return false;
27918 /* Success! */
27919 if (d->testing_p)
27920 return true;
27922 emit_insn (gen (d->target, d->op0));
27923 return true;
27926 /* Recognize patterns for the VTRN insns. */
27928 static bool
27929 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
27931 unsigned int i, odd, mask, nelt = d->nelt;
27932 rtx out0, out1, in0, in1, x;
27933 rtx (*gen)(rtx, rtx, rtx, rtx);
27935 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27936 return false;
27938 /* Note that these are little-endian tests. Adjust for big-endian later. */
27939 if (d->perm[0] == 0)
27940 odd = 0;
27941 else if (d->perm[0] == 1)
27942 odd = 1;
27943 else
27944 return false;
27945 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27947 for (i = 0; i < nelt; i += 2)
27949 if (d->perm[i] != i + odd)
27950 return false;
27951 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
27952 return false;
27955 /* Success! */
27956 if (d->testing_p)
27957 return true;
27959 switch (d->vmode)
27961 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
27962 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
27963 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
27964 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
27965 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
27966 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
27967 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
27968 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
27969 default:
27970 gcc_unreachable ();
27973 in0 = d->op0;
27974 in1 = d->op1;
27975 if (BYTES_BIG_ENDIAN)
27977 x = in0, in0 = in1, in1 = x;
27978 odd = !odd;
27981 out0 = d->target;
27982 out1 = gen_reg_rtx (d->vmode);
27983 if (odd)
27984 x = out0, out0 = out1, out1 = x;
27986 emit_insn (gen (out0, in0, in1, out1));
27987 return true;
27990 /* Recognize patterns for the VEXT insns. */
27992 static bool
27993 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
27995 unsigned int i, nelt = d->nelt;
27996 rtx (*gen) (rtx, rtx, rtx, rtx);
27997 rtx offset;
27999 unsigned int location;
28001 unsigned int next = d->perm[0] + 1;
28003 /* TODO: Handle GCC's numbering of elements for big-endian. */
28004 if (BYTES_BIG_ENDIAN)
28005 return false;
28007 /* Check if the extracted indexes are increasing by one. */
28008 for (i = 1; i < nelt; next++, i++)
28010 /* If we hit the most significant element of the 2nd vector in
28011 the previous iteration, no need to test further. */
28012 if (next == 2 * nelt)
28013 return false;
28015 /* If we are operating on only one vector: it could be a
28016 rotation. If there are only two elements of size < 64, let
28017 arm_evpc_neon_vrev catch it. */
28018 if (d->one_vector_p && (next == nelt))
28020 if ((nelt == 2) && (d->vmode != V2DImode))
28021 return false;
28022 else
28023 next = 0;
28026 if (d->perm[i] != next)
28027 return false;
28030 location = d->perm[0];
28032 switch (d->vmode)
28034 case V16QImode: gen = gen_neon_vextv16qi; break;
28035 case V8QImode: gen = gen_neon_vextv8qi; break;
28036 case V4HImode: gen = gen_neon_vextv4hi; break;
28037 case V8HImode: gen = gen_neon_vextv8hi; break;
28038 case V2SImode: gen = gen_neon_vextv2si; break;
28039 case V4SImode: gen = gen_neon_vextv4si; break;
28040 case V2SFmode: gen = gen_neon_vextv2sf; break;
28041 case V4SFmode: gen = gen_neon_vextv4sf; break;
28042 case V2DImode: gen = gen_neon_vextv2di; break;
28043 default:
28044 return false;
28047 /* Success! */
28048 if (d->testing_p)
28049 return true;
28051 offset = GEN_INT (location);
28052 emit_insn (gen (d->target, d->op0, d->op1, offset));
28053 return true;
28056 /* The NEON VTBL instruction is a fully variable permuation that's even
28057 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28058 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28059 can do slightly better by expanding this as a constant where we don't
28060 have to apply a mask. */
28062 static bool
28063 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
28065 rtx rperm[MAX_VECT_LEN], sel;
28066 machine_mode vmode = d->vmode;
28067 unsigned int i, nelt = d->nelt;
28069 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28070 numbering of elements for big-endian, we must reverse the order. */
28071 if (BYTES_BIG_ENDIAN)
28072 return false;
28074 if (d->testing_p)
28075 return true;
28077 /* Generic code will try constant permutation twice. Once with the
28078 original mode and again with the elements lowered to QImode.
28079 So wait and don't do the selector expansion ourselves. */
28080 if (vmode != V8QImode && vmode != V16QImode)
28081 return false;
28083 for (i = 0; i < nelt; ++i)
28084 rperm[i] = GEN_INT (d->perm[i]);
28085 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
28086 sel = force_reg (vmode, sel);
28088 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
28089 return true;
28092 static bool
28093 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
28095 /* Check if the input mask matches vext before reordering the
28096 operands. */
28097 if (TARGET_NEON)
28098 if (arm_evpc_neon_vext (d))
28099 return true;
28101 /* The pattern matching functions above are written to look for a small
28102 number to begin the sequence (0, 1, N/2). If we begin with an index
28103 from the second operand, we can swap the operands. */
28104 if (d->perm[0] >= d->nelt)
28106 unsigned i, nelt = d->nelt;
28107 rtx x;
28109 for (i = 0; i < nelt; ++i)
28110 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
28112 x = d->op0;
28113 d->op0 = d->op1;
28114 d->op1 = x;
28117 if (TARGET_NEON)
28119 if (arm_evpc_neon_vuzp (d))
28120 return true;
28121 if (arm_evpc_neon_vzip (d))
28122 return true;
28123 if (arm_evpc_neon_vrev (d))
28124 return true;
28125 if (arm_evpc_neon_vtrn (d))
28126 return true;
28127 return arm_evpc_neon_vtbl (d);
28129 return false;
28132 /* Expand a vec_perm_const pattern. */
28134 bool
28135 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
28137 struct expand_vec_perm_d d;
28138 int i, nelt, which;
28140 d.target = target;
28141 d.op0 = op0;
28142 d.op1 = op1;
28144 d.vmode = GET_MODE (target);
28145 gcc_assert (VECTOR_MODE_P (d.vmode));
28146 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28147 d.testing_p = false;
28149 for (i = which = 0; i < nelt; ++i)
28151 rtx e = XVECEXP (sel, 0, i);
28152 int ei = INTVAL (e) & (2 * nelt - 1);
28153 which |= (ei < nelt ? 1 : 2);
28154 d.perm[i] = ei;
28157 switch (which)
28159 default:
28160 gcc_unreachable();
28162 case 3:
28163 d.one_vector_p = false;
28164 if (!rtx_equal_p (op0, op1))
28165 break;
28167 /* The elements of PERM do not suggest that only the first operand
28168 is used, but both operands are identical. Allow easier matching
28169 of the permutation by folding the permutation into the single
28170 input vector. */
28171 /* FALLTHRU */
28172 case 2:
28173 for (i = 0; i < nelt; ++i)
28174 d.perm[i] &= nelt - 1;
28175 d.op0 = op1;
28176 d.one_vector_p = true;
28177 break;
28179 case 1:
28180 d.op1 = op0;
28181 d.one_vector_p = true;
28182 break;
28185 return arm_expand_vec_perm_const_1 (&d);
28188 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
28190 static bool
28191 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
28192 const unsigned char *sel)
28194 struct expand_vec_perm_d d;
28195 unsigned int i, nelt, which;
28196 bool ret;
28198 d.vmode = vmode;
28199 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28200 d.testing_p = true;
28201 memcpy (d.perm, sel, nelt);
28203 /* Categorize the set of elements in the selector. */
28204 for (i = which = 0; i < nelt; ++i)
28206 unsigned char e = d.perm[i];
28207 gcc_assert (e < 2 * nelt);
28208 which |= (e < nelt ? 1 : 2);
28211 /* For all elements from second vector, fold the elements to first. */
28212 if (which == 2)
28213 for (i = 0; i < nelt; ++i)
28214 d.perm[i] -= nelt;
28216 /* Check whether the mask can be applied to the vector type. */
28217 d.one_vector_p = (which != 3);
28219 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
28220 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
28221 if (!d.one_vector_p)
28222 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
28224 start_sequence ();
28225 ret = arm_expand_vec_perm_const_1 (&d);
28226 end_sequence ();
28228 return ret;
28231 bool
28232 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
28234 /* If we are soft float and we do not have ldrd
28235 then all auto increment forms are ok. */
28236 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
28237 return true;
28239 switch (code)
28241 /* Post increment and Pre Decrement are supported for all
28242 instruction forms except for vector forms. */
28243 case ARM_POST_INC:
28244 case ARM_PRE_DEC:
28245 if (VECTOR_MODE_P (mode))
28247 if (code != ARM_PRE_DEC)
28248 return true;
28249 else
28250 return false;
28253 return true;
28255 case ARM_POST_DEC:
28256 case ARM_PRE_INC:
28257 /* Without LDRD and mode size greater than
28258 word size, there is no point in auto-incrementing
28259 because ldm and stm will not have these forms. */
28260 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
28261 return false;
28263 /* Vector and floating point modes do not support
28264 these auto increment forms. */
28265 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
28266 return false;
28268 return true;
28270 default:
28271 return false;
28275 return false;
28278 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28279 on ARM, since we know that shifts by negative amounts are no-ops.
28280 Additionally, the default expansion code is not available or suitable
28281 for post-reload insn splits (this can occur when the register allocator
28282 chooses not to do a shift in NEON).
28284 This function is used in both initial expand and post-reload splits, and
28285 handles all kinds of 64-bit shifts.
28287 Input requirements:
28288 - It is safe for the input and output to be the same register, but
28289 early-clobber rules apply for the shift amount and scratch registers.
28290 - Shift by register requires both scratch registers. In all other cases
28291 the scratch registers may be NULL.
28292 - Ashiftrt by a register also clobbers the CC register. */
28293 void
28294 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
28295 rtx amount, rtx scratch1, rtx scratch2)
28297 rtx out_high = gen_highpart (SImode, out);
28298 rtx out_low = gen_lowpart (SImode, out);
28299 rtx in_high = gen_highpart (SImode, in);
28300 rtx in_low = gen_lowpart (SImode, in);
28302 /* Terminology:
28303 in = the register pair containing the input value.
28304 out = the destination register pair.
28305 up = the high- or low-part of each pair.
28306 down = the opposite part to "up".
28307 In a shift, we can consider bits to shift from "up"-stream to
28308 "down"-stream, so in a left-shift "up" is the low-part and "down"
28309 is the high-part of each register pair. */
28311 rtx out_up = code == ASHIFT ? out_low : out_high;
28312 rtx out_down = code == ASHIFT ? out_high : out_low;
28313 rtx in_up = code == ASHIFT ? in_low : in_high;
28314 rtx in_down = code == ASHIFT ? in_high : in_low;
28316 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
28317 gcc_assert (out
28318 && (REG_P (out) || GET_CODE (out) == SUBREG)
28319 && GET_MODE (out) == DImode);
28320 gcc_assert (in
28321 && (REG_P (in) || GET_CODE (in) == SUBREG)
28322 && GET_MODE (in) == DImode);
28323 gcc_assert (amount
28324 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
28325 && GET_MODE (amount) == SImode)
28326 || CONST_INT_P (amount)));
28327 gcc_assert (scratch1 == NULL
28328 || (GET_CODE (scratch1) == SCRATCH)
28329 || (GET_MODE (scratch1) == SImode
28330 && REG_P (scratch1)));
28331 gcc_assert (scratch2 == NULL
28332 || (GET_CODE (scratch2) == SCRATCH)
28333 || (GET_MODE (scratch2) == SImode
28334 && REG_P (scratch2)));
28335 gcc_assert (!REG_P (out) || !REG_P (amount)
28336 || !HARD_REGISTER_P (out)
28337 || (REGNO (out) != REGNO (amount)
28338 && REGNO (out) + 1 != REGNO (amount)));
28340 /* Macros to make following code more readable. */
28341 #define SUB_32(DEST,SRC) \
28342 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28343 #define RSB_32(DEST,SRC) \
28344 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28345 #define SUB_S_32(DEST,SRC) \
28346 gen_addsi3_compare0 ((DEST), (SRC), \
28347 GEN_INT (-32))
28348 #define SET(DEST,SRC) \
28349 gen_rtx_SET ((DEST), (SRC))
28350 #define SHIFT(CODE,SRC,AMOUNT) \
28351 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28352 #define LSHIFT(CODE,SRC,AMOUNT) \
28353 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28354 SImode, (SRC), (AMOUNT))
28355 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28356 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28357 SImode, (SRC), (AMOUNT))
28358 #define ORR(A,B) \
28359 gen_rtx_IOR (SImode, (A), (B))
28360 #define BRANCH(COND,LABEL) \
28361 gen_arm_cond_branch ((LABEL), \
28362 gen_rtx_ ## COND (CCmode, cc_reg, \
28363 const0_rtx), \
28364 cc_reg)
28366 /* Shifts by register and shifts by constant are handled separately. */
28367 if (CONST_INT_P (amount))
28369 /* We have a shift-by-constant. */
28371 /* First, handle out-of-range shift amounts.
28372 In both cases we try to match the result an ARM instruction in a
28373 shift-by-register would give. This helps reduce execution
28374 differences between optimization levels, but it won't stop other
28375 parts of the compiler doing different things. This is "undefined
28376 behaviour, in any case. */
28377 if (INTVAL (amount) <= 0)
28378 emit_insn (gen_movdi (out, in));
28379 else if (INTVAL (amount) >= 64)
28381 if (code == ASHIFTRT)
28383 rtx const31_rtx = GEN_INT (31);
28384 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
28385 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
28387 else
28388 emit_insn (gen_movdi (out, const0_rtx));
28391 /* Now handle valid shifts. */
28392 else if (INTVAL (amount) < 32)
28394 /* Shifts by a constant less than 32. */
28395 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
28397 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28398 emit_insn (SET (out_down,
28399 ORR (REV_LSHIFT (code, in_up, reverse_amount),
28400 out_down)));
28401 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28403 else
28405 /* Shifts by a constant greater than 31. */
28406 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
28408 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
28409 if (code == ASHIFTRT)
28410 emit_insn (gen_ashrsi3 (out_up, in_up,
28411 GEN_INT (31)));
28412 else
28413 emit_insn (SET (out_up, const0_rtx));
28416 else
28418 /* We have a shift-by-register. */
28419 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
28421 /* This alternative requires the scratch registers. */
28422 gcc_assert (scratch1 && REG_P (scratch1));
28423 gcc_assert (scratch2 && REG_P (scratch2));
28425 /* We will need the values "amount-32" and "32-amount" later.
28426 Swapping them around now allows the later code to be more general. */
28427 switch (code)
28429 case ASHIFT:
28430 emit_insn (SUB_32 (scratch1, amount));
28431 emit_insn (RSB_32 (scratch2, amount));
28432 break;
28433 case ASHIFTRT:
28434 emit_insn (RSB_32 (scratch1, amount));
28435 /* Also set CC = amount > 32. */
28436 emit_insn (SUB_S_32 (scratch2, amount));
28437 break;
28438 case LSHIFTRT:
28439 emit_insn (RSB_32 (scratch1, amount));
28440 emit_insn (SUB_32 (scratch2, amount));
28441 break;
28442 default:
28443 gcc_unreachable ();
28446 /* Emit code like this:
28448 arithmetic-left:
28449 out_down = in_down << amount;
28450 out_down = (in_up << (amount - 32)) | out_down;
28451 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28452 out_up = in_up << amount;
28454 arithmetic-right:
28455 out_down = in_down >> amount;
28456 out_down = (in_up << (32 - amount)) | out_down;
28457 if (amount < 32)
28458 out_down = ((signed)in_up >> (amount - 32)) | out_down;
28459 out_up = in_up << amount;
28461 logical-right:
28462 out_down = in_down >> amount;
28463 out_down = (in_up << (32 - amount)) | out_down;
28464 if (amount < 32)
28465 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28466 out_up = in_up << amount;
28468 The ARM and Thumb2 variants are the same but implemented slightly
28469 differently. If this were only called during expand we could just
28470 use the Thumb2 case and let combine do the right thing, but this
28471 can also be called from post-reload splitters. */
28473 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28475 if (!TARGET_THUMB2)
28477 /* Emit code for ARM mode. */
28478 emit_insn (SET (out_down,
28479 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
28480 if (code == ASHIFTRT)
28482 rtx_code_label *done_label = gen_label_rtx ();
28483 emit_jump_insn (BRANCH (LT, done_label));
28484 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
28485 out_down)));
28486 emit_label (done_label);
28488 else
28489 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
28490 out_down)));
28492 else
28494 /* Emit code for Thumb2 mode.
28495 Thumb2 can't do shift and or in one insn. */
28496 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
28497 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
28499 if (code == ASHIFTRT)
28501 rtx_code_label *done_label = gen_label_rtx ();
28502 emit_jump_insn (BRANCH (LT, done_label));
28503 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
28504 emit_insn (SET (out_down, ORR (out_down, scratch2)));
28505 emit_label (done_label);
28507 else
28509 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
28510 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
28514 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28517 #undef SUB_32
28518 #undef RSB_32
28519 #undef SUB_S_32
28520 #undef SET
28521 #undef SHIFT
28522 #undef LSHIFT
28523 #undef REV_LSHIFT
28524 #undef ORR
28525 #undef BRANCH
28529 /* Returns true if a valid comparison operation and makes
28530 the operands in a form that is valid. */
28531 bool
28532 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
28534 enum rtx_code code = GET_CODE (*comparison);
28535 int code_int;
28536 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
28537 ? GET_MODE (*op2) : GET_MODE (*op1);
28539 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
28541 if (code == UNEQ || code == LTGT)
28542 return false;
28544 code_int = (int)code;
28545 arm_canonicalize_comparison (&code_int, op1, op2, 0);
28546 PUT_CODE (*comparison, (enum rtx_code)code_int);
28548 switch (mode)
28550 case SImode:
28551 if (!arm_add_operand (*op1, mode))
28552 *op1 = force_reg (mode, *op1);
28553 if (!arm_add_operand (*op2, mode))
28554 *op2 = force_reg (mode, *op2);
28555 return true;
28557 case DImode:
28558 if (!cmpdi_operand (*op1, mode))
28559 *op1 = force_reg (mode, *op1);
28560 if (!cmpdi_operand (*op2, mode))
28561 *op2 = force_reg (mode, *op2);
28562 return true;
28564 case SFmode:
28565 case DFmode:
28566 if (!arm_float_compare_operand (*op1, mode))
28567 *op1 = force_reg (mode, *op1);
28568 if (!arm_float_compare_operand (*op2, mode))
28569 *op2 = force_reg (mode, *op2);
28570 return true;
28571 default:
28572 break;
28575 return false;
28579 /* Maximum number of instructions to set block of memory. */
28580 static int
28581 arm_block_set_max_insns (void)
28583 if (optimize_function_for_size_p (cfun))
28584 return 4;
28585 else
28586 return current_tune->max_insns_inline_memset;
28589 /* Return TRUE if it's profitable to set block of memory for
28590 non-vectorized case. VAL is the value to set the memory
28591 with. LENGTH is the number of bytes to set. ALIGN is the
28592 alignment of the destination memory in bytes. UNALIGNED_P
28593 is TRUE if we can only set the memory with instructions
28594 meeting alignment requirements. USE_STRD_P is TRUE if we
28595 can use strd to set the memory. */
28596 static bool
28597 arm_block_set_non_vect_profit_p (rtx val,
28598 unsigned HOST_WIDE_INT length,
28599 unsigned HOST_WIDE_INT align,
28600 bool unaligned_p, bool use_strd_p)
28602 int num = 0;
28603 /* For leftovers in bytes of 0-7, we can set the memory block using
28604 strb/strh/str with minimum instruction number. */
28605 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
28607 if (unaligned_p)
28609 num = arm_const_inline_cost (SET, val);
28610 num += length / align + length % align;
28612 else if (use_strd_p)
28614 num = arm_const_double_inline_cost (val);
28615 num += (length >> 3) + leftover[length & 7];
28617 else
28619 num = arm_const_inline_cost (SET, val);
28620 num += (length >> 2) + leftover[length & 3];
28623 /* We may be able to combine last pair STRH/STRB into a single STR
28624 by shifting one byte back. */
28625 if (unaligned_access && length > 3 && (length & 3) == 3)
28626 num--;
28628 return (num <= arm_block_set_max_insns ());
28631 /* Return TRUE if it's profitable to set block of memory for
28632 vectorized case. LENGTH is the number of bytes to set.
28633 ALIGN is the alignment of destination memory in bytes.
28634 MODE is the vector mode used to set the memory. */
28635 static bool
28636 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
28637 unsigned HOST_WIDE_INT align,
28638 machine_mode mode)
28640 int num;
28641 bool unaligned_p = ((align & 3) != 0);
28642 unsigned int nelt = GET_MODE_NUNITS (mode);
28644 /* Instruction loading constant value. */
28645 num = 1;
28646 /* Instructions storing the memory. */
28647 num += (length + nelt - 1) / nelt;
28648 /* Instructions adjusting the address expression. Only need to
28649 adjust address expression if it's 4 bytes aligned and bytes
28650 leftover can only be stored by mis-aligned store instruction. */
28651 if (!unaligned_p && (length & 3) != 0)
28652 num++;
28654 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
28655 if (!unaligned_p && mode == V16QImode)
28656 num--;
28658 return (num <= arm_block_set_max_insns ());
28661 /* Set a block of memory using vectorization instructions for the
28662 unaligned case. We fill the first LENGTH bytes of the memory
28663 area starting from DSTBASE with byte constant VALUE. ALIGN is
28664 the alignment requirement of memory. Return TRUE if succeeded. */
28665 static bool
28666 arm_block_set_unaligned_vect (rtx dstbase,
28667 unsigned HOST_WIDE_INT length,
28668 unsigned HOST_WIDE_INT value,
28669 unsigned HOST_WIDE_INT align)
28671 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
28672 rtx dst, mem;
28673 rtx val_elt, val_vec, reg;
28674 rtx rval[MAX_VECT_LEN];
28675 rtx (*gen_func) (rtx, rtx);
28676 machine_mode mode;
28677 unsigned HOST_WIDE_INT v = value;
28679 gcc_assert ((align & 0x3) != 0);
28680 nelt_v8 = GET_MODE_NUNITS (V8QImode);
28681 nelt_v16 = GET_MODE_NUNITS (V16QImode);
28682 if (length >= nelt_v16)
28684 mode = V16QImode;
28685 gen_func = gen_movmisalignv16qi;
28687 else
28689 mode = V8QImode;
28690 gen_func = gen_movmisalignv8qi;
28692 nelt_mode = GET_MODE_NUNITS (mode);
28693 gcc_assert (length >= nelt_mode);
28694 /* Skip if it isn't profitable. */
28695 if (!arm_block_set_vect_profit_p (length, align, mode))
28696 return false;
28698 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28699 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28701 v = sext_hwi (v, BITS_PER_WORD);
28702 val_elt = GEN_INT (v);
28703 for (j = 0; j < nelt_mode; j++)
28704 rval[j] = val_elt;
28706 reg = gen_reg_rtx (mode);
28707 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28708 /* Emit instruction loading the constant value. */
28709 emit_move_insn (reg, val_vec);
28711 /* Handle nelt_mode bytes in a vector. */
28712 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
28714 emit_insn ((*gen_func) (mem, reg));
28715 if (i + 2 * nelt_mode <= length)
28716 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
28719 /* If there are not less than nelt_v8 bytes leftover, we must be in
28720 V16QI mode. */
28721 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
28723 /* Handle (8, 16) bytes leftover. */
28724 if (i + nelt_v8 < length)
28726 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
28727 /* We are shifting bytes back, set the alignment accordingly. */
28728 if ((length & 1) != 0 && align >= 2)
28729 set_mem_align (mem, BITS_PER_UNIT);
28731 emit_insn (gen_movmisalignv16qi (mem, reg));
28733 /* Handle (0, 8] bytes leftover. */
28734 else if (i < length && i + nelt_v8 >= length)
28736 if (mode == V16QImode)
28738 reg = gen_lowpart (V8QImode, reg);
28739 mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
28741 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
28742 + (nelt_mode - nelt_v8))));
28743 /* We are shifting bytes back, set the alignment accordingly. */
28744 if ((length & 1) != 0 && align >= 2)
28745 set_mem_align (mem, BITS_PER_UNIT);
28747 emit_insn (gen_movmisalignv8qi (mem, reg));
28750 return true;
28753 /* Set a block of memory using vectorization instructions for the
28754 aligned case. We fill the first LENGTH bytes of the memory area
28755 starting from DSTBASE with byte constant VALUE. ALIGN is the
28756 alignment requirement of memory. Return TRUE if succeeded. */
28757 static bool
28758 arm_block_set_aligned_vect (rtx dstbase,
28759 unsigned HOST_WIDE_INT length,
28760 unsigned HOST_WIDE_INT value,
28761 unsigned HOST_WIDE_INT align)
28763 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
28764 rtx dst, addr, mem;
28765 rtx val_elt, val_vec, reg;
28766 rtx rval[MAX_VECT_LEN];
28767 machine_mode mode;
28768 unsigned HOST_WIDE_INT v = value;
28770 gcc_assert ((align & 0x3) == 0);
28771 nelt_v8 = GET_MODE_NUNITS (V8QImode);
28772 nelt_v16 = GET_MODE_NUNITS (V16QImode);
28773 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
28774 mode = V16QImode;
28775 else
28776 mode = V8QImode;
28778 nelt_mode = GET_MODE_NUNITS (mode);
28779 gcc_assert (length >= nelt_mode);
28780 /* Skip if it isn't profitable. */
28781 if (!arm_block_set_vect_profit_p (length, align, mode))
28782 return false;
28784 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28786 v = sext_hwi (v, BITS_PER_WORD);
28787 val_elt = GEN_INT (v);
28788 for (j = 0; j < nelt_mode; j++)
28789 rval[j] = val_elt;
28791 reg = gen_reg_rtx (mode);
28792 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28793 /* Emit instruction loading the constant value. */
28794 emit_move_insn (reg, val_vec);
28796 i = 0;
28797 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
28798 if (mode == V16QImode)
28800 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28801 emit_insn (gen_movmisalignv16qi (mem, reg));
28802 i += nelt_mode;
28803 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
28804 if (i + nelt_v8 < length && i + nelt_v16 > length)
28806 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
28807 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28808 /* We are shifting bytes back, set the alignment accordingly. */
28809 if ((length & 0x3) == 0)
28810 set_mem_align (mem, BITS_PER_UNIT * 4);
28811 else if ((length & 0x1) == 0)
28812 set_mem_align (mem, BITS_PER_UNIT * 2);
28813 else
28814 set_mem_align (mem, BITS_PER_UNIT);
28816 emit_insn (gen_movmisalignv16qi (mem, reg));
28817 return true;
28819 /* Fall through for bytes leftover. */
28820 mode = V8QImode;
28821 nelt_mode = GET_MODE_NUNITS (mode);
28822 reg = gen_lowpart (V8QImode, reg);
28825 /* Handle 8 bytes in a vector. */
28826 for (; (i + nelt_mode <= length); i += nelt_mode)
28828 addr = plus_constant (Pmode, dst, i);
28829 mem = adjust_automodify_address (dstbase, mode, addr, i);
28830 emit_move_insn (mem, reg);
28833 /* Handle single word leftover by shifting 4 bytes back. We can
28834 use aligned access for this case. */
28835 if (i + UNITS_PER_WORD == length)
28837 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
28838 mem = adjust_automodify_address (dstbase, mode,
28839 addr, i - UNITS_PER_WORD);
28840 /* We are shifting 4 bytes back, set the alignment accordingly. */
28841 if (align > UNITS_PER_WORD)
28842 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
28844 emit_move_insn (mem, reg);
28846 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
28847 We have to use unaligned access for this case. */
28848 else if (i < length)
28850 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
28851 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28852 /* We are shifting bytes back, set the alignment accordingly. */
28853 if ((length & 1) == 0)
28854 set_mem_align (mem, BITS_PER_UNIT * 2);
28855 else
28856 set_mem_align (mem, BITS_PER_UNIT);
28858 emit_insn (gen_movmisalignv8qi (mem, reg));
28861 return true;
28864 /* Set a block of memory using plain strh/strb instructions, only
28865 using instructions allowed by ALIGN on processor. We fill the
28866 first LENGTH bytes of the memory area starting from DSTBASE
28867 with byte constant VALUE. ALIGN is the alignment requirement
28868 of memory. */
28869 static bool
28870 arm_block_set_unaligned_non_vect (rtx dstbase,
28871 unsigned HOST_WIDE_INT length,
28872 unsigned HOST_WIDE_INT value,
28873 unsigned HOST_WIDE_INT align)
28875 unsigned int i;
28876 rtx dst, addr, mem;
28877 rtx val_exp, val_reg, reg;
28878 machine_mode mode;
28879 HOST_WIDE_INT v = value;
28881 gcc_assert (align == 1 || align == 2);
28883 if (align == 2)
28884 v |= (value << BITS_PER_UNIT);
28886 v = sext_hwi (v, BITS_PER_WORD);
28887 val_exp = GEN_INT (v);
28888 /* Skip if it isn't profitable. */
28889 if (!arm_block_set_non_vect_profit_p (val_exp, length,
28890 align, true, false))
28891 return false;
28893 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28894 mode = (align == 2 ? HImode : QImode);
28895 val_reg = force_reg (SImode, val_exp);
28896 reg = gen_lowpart (mode, val_reg);
28898 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
28900 addr = plus_constant (Pmode, dst, i);
28901 mem = adjust_automodify_address (dstbase, mode, addr, i);
28902 emit_move_insn (mem, reg);
28905 /* Handle single byte leftover. */
28906 if (i + 1 == length)
28908 reg = gen_lowpart (QImode, val_reg);
28909 addr = plus_constant (Pmode, dst, i);
28910 mem = adjust_automodify_address (dstbase, QImode, addr, i);
28911 emit_move_insn (mem, reg);
28912 i++;
28915 gcc_assert (i == length);
28916 return true;
28919 /* Set a block of memory using plain strd/str/strh/strb instructions,
28920 to permit unaligned copies on processors which support unaligned
28921 semantics for those instructions. We fill the first LENGTH bytes
28922 of the memory area starting from DSTBASE with byte constant VALUE.
28923 ALIGN is the alignment requirement of memory. */
28924 static bool
28925 arm_block_set_aligned_non_vect (rtx dstbase,
28926 unsigned HOST_WIDE_INT length,
28927 unsigned HOST_WIDE_INT value,
28928 unsigned HOST_WIDE_INT align)
28930 unsigned int i;
28931 rtx dst, addr, mem;
28932 rtx val_exp, val_reg, reg;
28933 unsigned HOST_WIDE_INT v;
28934 bool use_strd_p;
28936 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
28937 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
28939 v = (value | (value << 8) | (value << 16) | (value << 24));
28940 if (length < UNITS_PER_WORD)
28941 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
28943 if (use_strd_p)
28944 v |= (v << BITS_PER_WORD);
28945 else
28946 v = sext_hwi (v, BITS_PER_WORD);
28948 val_exp = GEN_INT (v);
28949 /* Skip if it isn't profitable. */
28950 if (!arm_block_set_non_vect_profit_p (val_exp, length,
28951 align, false, use_strd_p))
28953 if (!use_strd_p)
28954 return false;
28956 /* Try without strd. */
28957 v = (v >> BITS_PER_WORD);
28958 v = sext_hwi (v, BITS_PER_WORD);
28959 val_exp = GEN_INT (v);
28960 use_strd_p = false;
28961 if (!arm_block_set_non_vect_profit_p (val_exp, length,
28962 align, false, use_strd_p))
28963 return false;
28966 i = 0;
28967 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28968 /* Handle double words using strd if possible. */
28969 if (use_strd_p)
28971 val_reg = force_reg (DImode, val_exp);
28972 reg = val_reg;
28973 for (; (i + 8 <= length); i += 8)
28975 addr = plus_constant (Pmode, dst, i);
28976 mem = adjust_automodify_address (dstbase, DImode, addr, i);
28977 emit_move_insn (mem, reg);
28980 else
28981 val_reg = force_reg (SImode, val_exp);
28983 /* Handle words. */
28984 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
28985 for (; (i + 4 <= length); i += 4)
28987 addr = plus_constant (Pmode, dst, i);
28988 mem = adjust_automodify_address (dstbase, SImode, addr, i);
28989 if ((align & 3) == 0)
28990 emit_move_insn (mem, reg);
28991 else
28992 emit_insn (gen_unaligned_storesi (mem, reg));
28995 /* Merge last pair of STRH and STRB into a STR if possible. */
28996 if (unaligned_access && i > 0 && (i + 3) == length)
28998 addr = plus_constant (Pmode, dst, i - 1);
28999 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
29000 /* We are shifting one byte back, set the alignment accordingly. */
29001 if ((align & 1) == 0)
29002 set_mem_align (mem, BITS_PER_UNIT);
29004 /* Most likely this is an unaligned access, and we can't tell at
29005 compilation time. */
29006 emit_insn (gen_unaligned_storesi (mem, reg));
29007 return true;
29010 /* Handle half word leftover. */
29011 if (i + 2 <= length)
29013 reg = gen_lowpart (HImode, val_reg);
29014 addr = plus_constant (Pmode, dst, i);
29015 mem = adjust_automodify_address (dstbase, HImode, addr, i);
29016 if ((align & 1) == 0)
29017 emit_move_insn (mem, reg);
29018 else
29019 emit_insn (gen_unaligned_storehi (mem, reg));
29021 i += 2;
29024 /* Handle single byte leftover. */
29025 if (i + 1 == length)
29027 reg = gen_lowpart (QImode, val_reg);
29028 addr = plus_constant (Pmode, dst, i);
29029 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29030 emit_move_insn (mem, reg);
29033 return true;
29036 /* Set a block of memory using vectorization instructions for both
29037 aligned and unaligned cases. We fill the first LENGTH bytes of
29038 the memory area starting from DSTBASE with byte constant VALUE.
29039 ALIGN is the alignment requirement of memory. */
29040 static bool
29041 arm_block_set_vect (rtx dstbase,
29042 unsigned HOST_WIDE_INT length,
29043 unsigned HOST_WIDE_INT value,
29044 unsigned HOST_WIDE_INT align)
29046 /* Check whether we need to use unaligned store instruction. */
29047 if (((align & 3) != 0 || (length & 3) != 0)
29048 /* Check whether unaligned store instruction is available. */
29049 && (!unaligned_access || BYTES_BIG_ENDIAN))
29050 return false;
29052 if ((align & 3) == 0)
29053 return arm_block_set_aligned_vect (dstbase, length, value, align);
29054 else
29055 return arm_block_set_unaligned_vect (dstbase, length, value, align);
29058 /* Expand string store operation. Firstly we try to do that by using
29059 vectorization instructions, then try with ARM unaligned access and
29060 double-word store if profitable. OPERANDS[0] is the destination,
29061 OPERANDS[1] is the number of bytes, operands[2] is the value to
29062 initialize the memory, OPERANDS[3] is the known alignment of the
29063 destination. */
29064 bool
29065 arm_gen_setmem (rtx *operands)
29067 rtx dstbase = operands[0];
29068 unsigned HOST_WIDE_INT length;
29069 unsigned HOST_WIDE_INT value;
29070 unsigned HOST_WIDE_INT align;
29072 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
29073 return false;
29075 length = UINTVAL (operands[1]);
29076 if (length > 64)
29077 return false;
29079 value = (UINTVAL (operands[2]) & 0xFF);
29080 align = UINTVAL (operands[3]);
29081 if (TARGET_NEON && length >= 8
29082 && current_tune->string_ops_prefer_neon
29083 && arm_block_set_vect (dstbase, length, value, align))
29084 return true;
29086 if (!unaligned_access && (align & 3) != 0)
29087 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
29089 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
29093 static bool
29094 arm_macro_fusion_p (void)
29096 return current_tune->fuseable_ops != ARM_FUSE_NOTHING;
29100 static bool
29101 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
29103 rtx set_dest;
29104 rtx prev_set = single_set (prev);
29105 rtx curr_set = single_set (curr);
29107 if (!prev_set
29108 || !curr_set)
29109 return false;
29111 if (any_condjump_p (curr))
29112 return false;
29114 if (!arm_macro_fusion_p ())
29115 return false;
29117 if (current_tune->fuseable_ops & ARM_FUSE_MOVW_MOVT)
29119 /* We are trying to fuse
29120 movw imm / movt imm
29121 instructions as a group that gets scheduled together. */
29123 set_dest = SET_DEST (curr_set);
29125 if (GET_MODE (set_dest) != SImode)
29126 return false;
29128 /* We are trying to match:
29129 prev (movw) == (set (reg r0) (const_int imm16))
29130 curr (movt) == (set (zero_extract (reg r0)
29131 (const_int 16)
29132 (const_int 16))
29133 (const_int imm16_1))
29135 prev (movw) == (set (reg r1)
29136 (high (symbol_ref ("SYM"))))
29137 curr (movt) == (set (reg r0)
29138 (lo_sum (reg r1)
29139 (symbol_ref ("SYM")))) */
29140 if (GET_CODE (set_dest) == ZERO_EXTRACT)
29142 if (CONST_INT_P (SET_SRC (curr_set))
29143 && CONST_INT_P (SET_SRC (prev_set))
29144 && REG_P (XEXP (set_dest, 0))
29145 && REG_P (SET_DEST (prev_set))
29146 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
29147 return true;
29149 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
29150 && REG_P (SET_DEST (curr_set))
29151 && REG_P (SET_DEST (prev_set))
29152 && GET_CODE (SET_SRC (prev_set)) == HIGH
29153 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
29154 return true;
29156 return false;
29159 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
29161 static unsigned HOST_WIDE_INT
29162 arm_asan_shadow_offset (void)
29164 return (unsigned HOST_WIDE_INT) 1 << 29;
29168 /* This is a temporary fix for PR60655. Ideally we need
29169 to handle most of these cases in the generic part but
29170 currently we reject minus (..) (sym_ref). We try to
29171 ameliorate the case with minus (sym_ref1) (sym_ref2)
29172 where they are in the same section. */
29174 static bool
29175 arm_const_not_ok_for_debug_p (rtx p)
29177 tree decl_op0 = NULL;
29178 tree decl_op1 = NULL;
29180 if (GET_CODE (p) == MINUS)
29182 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
29184 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
29185 if (decl_op1
29186 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
29187 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
29189 if ((TREE_CODE (decl_op1) == VAR_DECL
29190 || TREE_CODE (decl_op1) == CONST_DECL)
29191 && (TREE_CODE (decl_op0) == VAR_DECL
29192 || TREE_CODE (decl_op0) == CONST_DECL))
29193 return (get_variable_section (decl_op1, false)
29194 != get_variable_section (decl_op0, false));
29196 if (TREE_CODE (decl_op1) == LABEL_DECL
29197 && TREE_CODE (decl_op0) == LABEL_DECL)
29198 return (DECL_CONTEXT (decl_op1)
29199 != DECL_CONTEXT (decl_op0));
29202 return true;
29206 return false;
29209 /* return TRUE if x is a reference to a value in a constant pool */
29210 extern bool
29211 arm_is_constant_pool_ref (rtx x)
29213 return (MEM_P (x)
29214 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
29215 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
29218 /* If MEM is in the form of [base+offset], extract the two parts
29219 of address and set to BASE and OFFSET, otherwise return false
29220 after clearing BASE and OFFSET. */
29222 static bool
29223 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
29225 rtx addr;
29227 gcc_assert (MEM_P (mem));
29229 addr = XEXP (mem, 0);
29231 /* Strip off const from addresses like (const (addr)). */
29232 if (GET_CODE (addr) == CONST)
29233 addr = XEXP (addr, 0);
29235 if (GET_CODE (addr) == REG)
29237 *base = addr;
29238 *offset = const0_rtx;
29239 return true;
29242 if (GET_CODE (addr) == PLUS
29243 && GET_CODE (XEXP (addr, 0)) == REG
29244 && CONST_INT_P (XEXP (addr, 1)))
29246 *base = XEXP (addr, 0);
29247 *offset = XEXP (addr, 1);
29248 return true;
29251 *base = NULL_RTX;
29252 *offset = NULL_RTX;
29254 return false;
29257 /* If INSN is a load or store of address in the form of [base+offset],
29258 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
29259 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
29260 otherwise return FALSE. */
29262 static bool
29263 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
29265 rtx x, dest, src;
29267 gcc_assert (INSN_P (insn));
29268 x = PATTERN (insn);
29269 if (GET_CODE (x) != SET)
29270 return false;
29272 src = SET_SRC (x);
29273 dest = SET_DEST (x);
29274 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
29276 *is_load = false;
29277 extract_base_offset_in_addr (dest, base, offset);
29279 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
29281 *is_load = true;
29282 extract_base_offset_in_addr (src, base, offset);
29284 else
29285 return false;
29287 return (*base != NULL_RTX && *offset != NULL_RTX);
29290 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
29292 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
29293 and PRI are only calculated for these instructions. For other instruction,
29294 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
29295 instruction fusion can be supported by returning different priorities.
29297 It's important that irrelevant instructions get the largest FUSION_PRI. */
29299 static void
29300 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
29301 int *fusion_pri, int *pri)
29303 int tmp, off_val;
29304 bool is_load;
29305 rtx base, offset;
29307 gcc_assert (INSN_P (insn));
29309 tmp = max_pri - 1;
29310 if (!fusion_load_store (insn, &base, &offset, &is_load))
29312 *pri = tmp;
29313 *fusion_pri = tmp;
29314 return;
29317 /* Load goes first. */
29318 if (is_load)
29319 *fusion_pri = tmp - 1;
29320 else
29321 *fusion_pri = tmp - 2;
29323 tmp /= 2;
29325 /* INSN with smaller base register goes first. */
29326 tmp -= ((REGNO (base) & 0xff) << 20);
29328 /* INSN with smaller offset goes first. */
29329 off_val = (int)(INTVAL (offset));
29330 if (off_val >= 0)
29331 tmp -= (off_val & 0xfffff);
29332 else
29333 tmp += ((- off_val) & 0xfffff);
29335 *pri = tmp;
29336 return;
29338 #include "gt-arm.h"