gcc/
[official-gcc.git] / gcc / config / arm / arm.c
blob7bf5b4dfcc7c43f30dbfe977177729b99d807379
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2015 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "hash-table.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "hash-set.h"
30 #include "machmode.h"
31 #include "vec.h"
32 #include "double-int.h"
33 #include "input.h"
34 #include "alias.h"
35 #include "symtab.h"
36 #include "wide-int.h"
37 #include "inchash.h"
38 #include "tree.h"
39 #include "fold-const.h"
40 #include "stringpool.h"
41 #include "stor-layout.h"
42 #include "calls.h"
43 #include "varasm.h"
44 #include "obstack.h"
45 #include "regs.h"
46 #include "hard-reg-set.h"
47 #include "insn-config.h"
48 #include "conditions.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "flags.h"
52 #include "reload.h"
53 #include "function.h"
54 #include "hashtab.h"
55 #include "statistics.h"
56 #include "real.h"
57 #include "fixed-value.h"
58 #include "expmed.h"
59 #include "dojump.h"
60 #include "explow.h"
61 #include "emit-rtl.h"
62 #include "stmt.h"
63 #include "expr.h"
64 #include "insn-codes.h"
65 #include "optabs.h"
66 #include "diagnostic-core.h"
67 #include "recog.h"
68 #include "predict.h"
69 #include "dominance.h"
70 #include "cfg.h"
71 #include "cfgrtl.h"
72 #include "cfganal.h"
73 #include "lcm.h"
74 #include "cfgbuild.h"
75 #include "cfgcleanup.h"
76 #include "basic-block.h"
77 #include "hash-map.h"
78 #include "is-a.h"
79 #include "plugin-api.h"
80 #include "ipa-ref.h"
81 #include "cgraph.h"
82 #include "ggc.h"
83 #include "except.h"
84 #include "tm_p.h"
85 #include "target.h"
86 #include "sched-int.h"
87 #include "target-def.h"
88 #include "debug.h"
89 #include "langhooks.h"
90 #include "df.h"
91 #include "intl.h"
92 #include "libfuncs.h"
93 #include "params.h"
94 #include "opts.h"
95 #include "dumpfile.h"
96 #include "gimple-expr.h"
97 #include "builtins.h"
98 #include "tm-constrs.h"
99 #include "rtl-iter.h"
100 #include "sched-int.h"
102 /* Forward definitions of types. */
103 typedef struct minipool_node Mnode;
104 typedef struct minipool_fixup Mfix;
106 void (*arm_lang_output_object_attributes_hook)(void);
108 struct four_ints
110 int i[4];
113 /* Forward function declarations. */
114 static bool arm_const_not_ok_for_debug_p (rtx);
115 static bool arm_needs_doubleword_align (machine_mode, const_tree);
116 static int arm_compute_static_chain_stack_bytes (void);
117 static arm_stack_offsets *arm_get_frame_offsets (void);
118 static void arm_add_gc_roots (void);
119 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
120 HOST_WIDE_INT, rtx, rtx, int, int);
121 static unsigned bit_count (unsigned long);
122 static int arm_address_register_rtx_p (rtx, int);
123 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
124 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
125 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
126 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
127 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
128 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
129 inline static int thumb1_index_register_rtx_p (rtx, int);
130 static int thumb_far_jump_used_p (void);
131 static bool thumb_force_lr_save (void);
132 static unsigned arm_size_return_regs (void);
133 static bool arm_assemble_integer (rtx, unsigned int, int);
134 static void arm_print_operand (FILE *, rtx, int);
135 static void arm_print_operand_address (FILE *, rtx);
136 static bool arm_print_operand_punct_valid_p (unsigned char code);
137 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
138 static arm_cc get_arm_condition_code (rtx);
139 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
140 static const char *output_multi_immediate (rtx *, const char *, const char *,
141 int, HOST_WIDE_INT);
142 static const char *shift_op (rtx, HOST_WIDE_INT *);
143 static struct machine_function *arm_init_machine_status (void);
144 static void thumb_exit (FILE *, int);
145 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
146 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
147 static Mnode *add_minipool_forward_ref (Mfix *);
148 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
149 static Mnode *add_minipool_backward_ref (Mfix *);
150 static void assign_minipool_offsets (Mfix *);
151 static void arm_print_value (FILE *, rtx);
152 static void dump_minipool (rtx_insn *);
153 static int arm_barrier_cost (rtx);
154 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
155 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
156 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
157 machine_mode, rtx);
158 static void arm_reorg (void);
159 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
160 static unsigned long arm_compute_save_reg0_reg12_mask (void);
161 static unsigned long arm_compute_save_reg_mask (void);
162 static unsigned long arm_isr_value (tree);
163 static unsigned long arm_compute_func_type (void);
164 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
165 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
166 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
167 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
168 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
169 #endif
170 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
171 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
172 static int arm_comp_type_attributes (const_tree, const_tree);
173 static void arm_set_default_type_attributes (tree);
174 static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
175 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
176 static int optimal_immediate_sequence (enum rtx_code code,
177 unsigned HOST_WIDE_INT val,
178 struct four_ints *return_sequence);
179 static int optimal_immediate_sequence_1 (enum rtx_code code,
180 unsigned HOST_WIDE_INT val,
181 struct four_ints *return_sequence,
182 int i);
183 static int arm_get_strip_length (int);
184 static bool arm_function_ok_for_sibcall (tree, tree);
185 static machine_mode arm_promote_function_mode (const_tree,
186 machine_mode, int *,
187 const_tree, int);
188 static bool arm_return_in_memory (const_tree, const_tree);
189 static rtx arm_function_value (const_tree, const_tree, bool);
190 static rtx arm_libcall_value_1 (machine_mode);
191 static rtx arm_libcall_value (machine_mode, const_rtx);
192 static bool arm_function_value_regno_p (const unsigned int);
193 static void arm_internal_label (FILE *, const char *, unsigned long);
194 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
195 tree);
196 static bool arm_have_conditional_execution (void);
197 static bool arm_cannot_force_const_mem (machine_mode, rtx);
198 static bool arm_legitimate_constant_p (machine_mode, rtx);
199 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
200 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
201 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
202 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
203 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
204 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
205 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
206 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
207 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
208 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
209 static void emit_constant_insn (rtx cond, rtx pattern);
210 static rtx_insn *emit_set_insn (rtx, rtx);
211 static rtx emit_multi_reg_push (unsigned long, unsigned long);
212 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
213 tree, bool);
214 static rtx arm_function_arg (cumulative_args_t, machine_mode,
215 const_tree, bool);
216 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
217 const_tree, bool);
218 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
219 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
220 const_tree);
221 static rtx aapcs_libcall_value (machine_mode);
222 static int aapcs_select_return_coproc (const_tree, const_tree);
224 #ifdef OBJECT_FORMAT_ELF
225 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
226 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
227 #endif
228 #ifndef ARM_PE
229 static void arm_encode_section_info (tree, rtx, int);
230 #endif
232 static void arm_file_end (void);
233 static void arm_file_start (void);
235 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
236 tree, int *, int);
237 static bool arm_pass_by_reference (cumulative_args_t,
238 machine_mode, const_tree, bool);
239 static bool arm_promote_prototypes (const_tree);
240 static bool arm_default_short_enums (void);
241 static bool arm_align_anon_bitfield (void);
242 static bool arm_return_in_msb (const_tree);
243 static bool arm_must_pass_in_stack (machine_mode, const_tree);
244 static bool arm_return_in_memory (const_tree, const_tree);
245 #if ARM_UNWIND_INFO
246 static void arm_unwind_emit (FILE *, rtx_insn *);
247 static bool arm_output_ttype (rtx);
248 static void arm_asm_emit_except_personality (rtx);
249 static void arm_asm_init_sections (void);
250 #endif
251 static rtx arm_dwarf_register_span (rtx);
253 static tree arm_cxx_guard_type (void);
254 static bool arm_cxx_guard_mask_bit (void);
255 static tree arm_get_cookie_size (tree);
256 static bool arm_cookie_has_size (void);
257 static bool arm_cxx_cdtor_returns_this (void);
258 static bool arm_cxx_key_method_may_be_inline (void);
259 static void arm_cxx_determine_class_data_visibility (tree);
260 static bool arm_cxx_class_data_always_comdat (void);
261 static bool arm_cxx_use_aeabi_atexit (void);
262 static void arm_init_libfuncs (void);
263 static tree arm_build_builtin_va_list (void);
264 static void arm_expand_builtin_va_start (tree, rtx);
265 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
266 static void arm_option_override (void);
267 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
268 static bool arm_macro_fusion_p (void);
269 static bool arm_cannot_copy_insn_p (rtx_insn *);
270 static int arm_issue_rate (void);
271 static int arm_first_cycle_multipass_dfa_lookahead (void);
272 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
273 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
274 static bool arm_output_addr_const_extra (FILE *, rtx);
275 static bool arm_allocate_stack_slots_for_args (void);
276 static bool arm_warn_func_return (tree);
277 static const char *arm_invalid_parameter_type (const_tree t);
278 static const char *arm_invalid_return_type (const_tree t);
279 static tree arm_promoted_type (const_tree t);
280 static tree arm_convert_to_type (tree type, tree expr);
281 static bool arm_scalar_mode_supported_p (machine_mode);
282 static bool arm_frame_pointer_required (void);
283 static bool arm_can_eliminate (const int, const int);
284 static void arm_asm_trampoline_template (FILE *);
285 static void arm_trampoline_init (rtx, tree, rtx);
286 static rtx arm_trampoline_adjust_address (rtx);
287 static rtx arm_pic_static_addr (rtx orig, rtx reg);
288 static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
289 static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
290 static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
291 static bool arm_array_mode_supported_p (machine_mode,
292 unsigned HOST_WIDE_INT);
293 static machine_mode arm_preferred_simd_mode (machine_mode);
294 static bool arm_class_likely_spilled_p (reg_class_t);
295 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
296 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
297 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
298 const_tree type,
299 int misalignment,
300 bool is_packed);
301 static void arm_conditional_register_usage (void);
302 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
303 static unsigned int arm_autovectorize_vector_sizes (void);
304 static int arm_default_branch_cost (bool, bool);
305 static int arm_cortex_a5_branch_cost (bool, bool);
306 static int arm_cortex_m_branch_cost (bool, bool);
307 static int arm_cortex_m7_branch_cost (bool, bool);
309 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
310 const unsigned char *sel);
312 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
314 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
315 tree vectype,
316 int misalign ATTRIBUTE_UNUSED);
317 static unsigned arm_add_stmt_cost (void *data, int count,
318 enum vect_cost_for_stmt kind,
319 struct _stmt_vec_info *stmt_info,
320 int misalign,
321 enum vect_cost_model_location where);
323 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
324 bool op0_preserve_value);
325 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
327 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
329 /* Table of machine attributes. */
330 static const struct attribute_spec arm_attribute_table[] =
332 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
333 affects_type_identity } */
334 /* Function calls made to this symbol must be done indirectly, because
335 it may lie outside of the 26 bit addressing range of a normal function
336 call. */
337 { "long_call", 0, 0, false, true, true, NULL, false },
338 /* Whereas these functions are always known to reside within the 26 bit
339 addressing range. */
340 { "short_call", 0, 0, false, true, true, NULL, false },
341 /* Specify the procedure call conventions for a function. */
342 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
343 false },
344 /* Interrupt Service Routines have special prologue and epilogue requirements. */
345 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
346 false },
347 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
348 false },
349 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
350 false },
351 #ifdef ARM_PE
352 /* ARM/PE has three new attributes:
353 interfacearm - ?
354 dllexport - for exporting a function/variable that will live in a dll
355 dllimport - for importing a function/variable from a dll
357 Microsoft allows multiple declspecs in one __declspec, separating
358 them with spaces. We do NOT support this. Instead, use __declspec
359 multiple times.
361 { "dllimport", 0, 0, true, false, false, NULL, false },
362 { "dllexport", 0, 0, true, false, false, NULL, false },
363 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
364 false },
365 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
366 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
367 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
368 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
369 false },
370 #endif
371 { NULL, 0, 0, false, false, false, NULL, false }
374 /* Initialize the GCC target structure. */
375 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
376 #undef TARGET_MERGE_DECL_ATTRIBUTES
377 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
378 #endif
380 #undef TARGET_LEGITIMIZE_ADDRESS
381 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
383 #undef TARGET_LRA_P
384 #define TARGET_LRA_P hook_bool_void_true
386 #undef TARGET_ATTRIBUTE_TABLE
387 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
389 #undef TARGET_ASM_FILE_START
390 #define TARGET_ASM_FILE_START arm_file_start
391 #undef TARGET_ASM_FILE_END
392 #define TARGET_ASM_FILE_END arm_file_end
394 #undef TARGET_ASM_ALIGNED_SI_OP
395 #define TARGET_ASM_ALIGNED_SI_OP NULL
396 #undef TARGET_ASM_INTEGER
397 #define TARGET_ASM_INTEGER arm_assemble_integer
399 #undef TARGET_PRINT_OPERAND
400 #define TARGET_PRINT_OPERAND arm_print_operand
401 #undef TARGET_PRINT_OPERAND_ADDRESS
402 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
403 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
404 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
406 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
407 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
409 #undef TARGET_ASM_FUNCTION_PROLOGUE
410 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
412 #undef TARGET_ASM_FUNCTION_EPILOGUE
413 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
415 #undef TARGET_OPTION_OVERRIDE
416 #define TARGET_OPTION_OVERRIDE arm_option_override
418 #undef TARGET_COMP_TYPE_ATTRIBUTES
419 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
421 #undef TARGET_SCHED_MACRO_FUSION_P
422 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
424 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
425 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
427 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
428 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
430 #undef TARGET_SCHED_ADJUST_COST
431 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
433 #undef TARGET_SCHED_REORDER
434 #define TARGET_SCHED_REORDER arm_sched_reorder
436 #undef TARGET_REGISTER_MOVE_COST
437 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
439 #undef TARGET_MEMORY_MOVE_COST
440 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
442 #undef TARGET_ENCODE_SECTION_INFO
443 #ifdef ARM_PE
444 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
445 #else
446 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
447 #endif
449 #undef TARGET_STRIP_NAME_ENCODING
450 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
452 #undef TARGET_ASM_INTERNAL_LABEL
453 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
455 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
456 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
458 #undef TARGET_FUNCTION_VALUE
459 #define TARGET_FUNCTION_VALUE arm_function_value
461 #undef TARGET_LIBCALL_VALUE
462 #define TARGET_LIBCALL_VALUE arm_libcall_value
464 #undef TARGET_FUNCTION_VALUE_REGNO_P
465 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
467 #undef TARGET_ASM_OUTPUT_MI_THUNK
468 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
469 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
470 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
472 #undef TARGET_RTX_COSTS
473 #define TARGET_RTX_COSTS arm_rtx_costs
474 #undef TARGET_ADDRESS_COST
475 #define TARGET_ADDRESS_COST arm_address_cost
477 #undef TARGET_SHIFT_TRUNCATION_MASK
478 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
479 #undef TARGET_VECTOR_MODE_SUPPORTED_P
480 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
481 #undef TARGET_ARRAY_MODE_SUPPORTED_P
482 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
483 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
484 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
485 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
486 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
487 arm_autovectorize_vector_sizes
489 #undef TARGET_MACHINE_DEPENDENT_REORG
490 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
492 #undef TARGET_INIT_BUILTINS
493 #define TARGET_INIT_BUILTINS arm_init_builtins
494 #undef TARGET_EXPAND_BUILTIN
495 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
496 #undef TARGET_BUILTIN_DECL
497 #define TARGET_BUILTIN_DECL arm_builtin_decl
499 #undef TARGET_INIT_LIBFUNCS
500 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
502 #undef TARGET_PROMOTE_FUNCTION_MODE
503 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
504 #undef TARGET_PROMOTE_PROTOTYPES
505 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
506 #undef TARGET_PASS_BY_REFERENCE
507 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
508 #undef TARGET_ARG_PARTIAL_BYTES
509 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
510 #undef TARGET_FUNCTION_ARG
511 #define TARGET_FUNCTION_ARG arm_function_arg
512 #undef TARGET_FUNCTION_ARG_ADVANCE
513 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
514 #undef TARGET_FUNCTION_ARG_BOUNDARY
515 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
517 #undef TARGET_SETUP_INCOMING_VARARGS
518 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
520 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
521 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
523 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
524 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
525 #undef TARGET_TRAMPOLINE_INIT
526 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
527 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
528 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
530 #undef TARGET_WARN_FUNC_RETURN
531 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
533 #undef TARGET_DEFAULT_SHORT_ENUMS
534 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
536 #undef TARGET_ALIGN_ANON_BITFIELD
537 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
539 #undef TARGET_NARROW_VOLATILE_BITFIELD
540 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
542 #undef TARGET_CXX_GUARD_TYPE
543 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
545 #undef TARGET_CXX_GUARD_MASK_BIT
546 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
548 #undef TARGET_CXX_GET_COOKIE_SIZE
549 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
551 #undef TARGET_CXX_COOKIE_HAS_SIZE
552 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
554 #undef TARGET_CXX_CDTOR_RETURNS_THIS
555 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
557 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
558 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
560 #undef TARGET_CXX_USE_AEABI_ATEXIT
561 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
563 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
564 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
565 arm_cxx_determine_class_data_visibility
567 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
568 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
570 #undef TARGET_RETURN_IN_MSB
571 #define TARGET_RETURN_IN_MSB arm_return_in_msb
573 #undef TARGET_RETURN_IN_MEMORY
574 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
576 #undef TARGET_MUST_PASS_IN_STACK
577 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
579 #if ARM_UNWIND_INFO
580 #undef TARGET_ASM_UNWIND_EMIT
581 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
583 /* EABI unwinding tables use a different format for the typeinfo tables. */
584 #undef TARGET_ASM_TTYPE
585 #define TARGET_ASM_TTYPE arm_output_ttype
587 #undef TARGET_ARM_EABI_UNWINDER
588 #define TARGET_ARM_EABI_UNWINDER true
590 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
591 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
593 #undef TARGET_ASM_INIT_SECTIONS
594 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
595 #endif /* ARM_UNWIND_INFO */
597 #undef TARGET_DWARF_REGISTER_SPAN
598 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
600 #undef TARGET_CANNOT_COPY_INSN_P
601 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
603 #ifdef HAVE_AS_TLS
604 #undef TARGET_HAVE_TLS
605 #define TARGET_HAVE_TLS true
606 #endif
608 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
609 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
611 #undef TARGET_LEGITIMATE_CONSTANT_P
612 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
614 #undef TARGET_CANNOT_FORCE_CONST_MEM
615 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
617 #undef TARGET_MAX_ANCHOR_OFFSET
618 #define TARGET_MAX_ANCHOR_OFFSET 4095
620 /* The minimum is set such that the total size of the block
621 for a particular anchor is -4088 + 1 + 4095 bytes, which is
622 divisible by eight, ensuring natural spacing of anchors. */
623 #undef TARGET_MIN_ANCHOR_OFFSET
624 #define TARGET_MIN_ANCHOR_OFFSET -4088
626 #undef TARGET_SCHED_ISSUE_RATE
627 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
629 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
630 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
631 arm_first_cycle_multipass_dfa_lookahead
633 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
634 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
635 arm_first_cycle_multipass_dfa_lookahead_guard
637 #undef TARGET_MANGLE_TYPE
638 #define TARGET_MANGLE_TYPE arm_mangle_type
640 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
641 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
643 #undef TARGET_BUILD_BUILTIN_VA_LIST
644 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
645 #undef TARGET_EXPAND_BUILTIN_VA_START
646 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
647 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
648 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
650 #ifdef HAVE_AS_TLS
651 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
652 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
653 #endif
655 #undef TARGET_LEGITIMATE_ADDRESS_P
656 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
658 #undef TARGET_PREFERRED_RELOAD_CLASS
659 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
661 #undef TARGET_INVALID_PARAMETER_TYPE
662 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
664 #undef TARGET_INVALID_RETURN_TYPE
665 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
667 #undef TARGET_PROMOTED_TYPE
668 #define TARGET_PROMOTED_TYPE arm_promoted_type
670 #undef TARGET_CONVERT_TO_TYPE
671 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
673 #undef TARGET_SCALAR_MODE_SUPPORTED_P
674 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
676 #undef TARGET_FRAME_POINTER_REQUIRED
677 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
679 #undef TARGET_CAN_ELIMINATE
680 #define TARGET_CAN_ELIMINATE arm_can_eliminate
682 #undef TARGET_CONDITIONAL_REGISTER_USAGE
683 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
685 #undef TARGET_CLASS_LIKELY_SPILLED_P
686 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
688 #undef TARGET_VECTORIZE_BUILTINS
689 #define TARGET_VECTORIZE_BUILTINS
691 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
692 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
693 arm_builtin_vectorized_function
695 #undef TARGET_VECTOR_ALIGNMENT
696 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
698 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
699 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
700 arm_vector_alignment_reachable
702 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
703 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
704 arm_builtin_support_vector_misalignment
706 #undef TARGET_PREFERRED_RENAME_CLASS
707 #define TARGET_PREFERRED_RENAME_CLASS \
708 arm_preferred_rename_class
710 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
711 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
712 arm_vectorize_vec_perm_const_ok
714 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
715 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
716 arm_builtin_vectorization_cost
717 #undef TARGET_VECTORIZE_ADD_STMT_COST
718 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
720 #undef TARGET_CANONICALIZE_COMPARISON
721 #define TARGET_CANONICALIZE_COMPARISON \
722 arm_canonicalize_comparison
724 #undef TARGET_ASAN_SHADOW_OFFSET
725 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
727 #undef MAX_INSN_PER_IT_BLOCK
728 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
730 #undef TARGET_CAN_USE_DOLOOP_P
731 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
733 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
734 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
736 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
737 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
739 #undef TARGET_SCHED_FUSION_PRIORITY
740 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
742 struct gcc_target targetm = TARGET_INITIALIZER;
744 /* Obstack for minipool constant handling. */
745 static struct obstack minipool_obstack;
746 static char * minipool_startobj;
748 /* The maximum number of insns skipped which
749 will be conditionalised if possible. */
750 static int max_insns_skipped = 5;
752 extern FILE * asm_out_file;
754 /* True if we are currently building a constant table. */
755 int making_const_table;
757 /* The processor for which instructions should be scheduled. */
758 enum processor_type arm_tune = arm_none;
760 /* The current tuning set. */
761 const struct tune_params *current_tune;
763 /* Which floating point hardware to schedule for. */
764 int arm_fpu_attr;
766 /* Which floating popint hardware to use. */
767 const struct arm_fpu_desc *arm_fpu_desc;
769 /* Used for Thumb call_via trampolines. */
770 rtx thumb_call_via_label[14];
771 static int thumb_call_reg_needed;
773 /* The bits in this mask specify which
774 instructions we are allowed to generate. */
775 unsigned long insn_flags = 0;
777 /* The bits in this mask specify which instruction scheduling options should
778 be used. */
779 unsigned long tune_flags = 0;
781 /* The highest ARM architecture version supported by the
782 target. */
783 enum base_architecture arm_base_arch = BASE_ARCH_0;
785 /* The following are used in the arm.md file as equivalents to bits
786 in the above two flag variables. */
788 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
789 int arm_arch3m = 0;
791 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
792 int arm_arch4 = 0;
794 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
795 int arm_arch4t = 0;
797 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
798 int arm_arch5 = 0;
800 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
801 int arm_arch5e = 0;
803 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
804 int arm_arch6 = 0;
806 /* Nonzero if this chip supports the ARM 6K extensions. */
807 int arm_arch6k = 0;
809 /* Nonzero if instructions present in ARMv6-M can be used. */
810 int arm_arch6m = 0;
812 /* Nonzero if this chip supports the ARM 7 extensions. */
813 int arm_arch7 = 0;
815 /* Nonzero if instructions not present in the 'M' profile can be used. */
816 int arm_arch_notm = 0;
818 /* Nonzero if instructions present in ARMv7E-M can be used. */
819 int arm_arch7em = 0;
821 /* Nonzero if instructions present in ARMv8 can be used. */
822 int arm_arch8 = 0;
824 /* Nonzero if this chip can benefit from load scheduling. */
825 int arm_ld_sched = 0;
827 /* Nonzero if this chip is a StrongARM. */
828 int arm_tune_strongarm = 0;
830 /* Nonzero if this chip supports Intel Wireless MMX technology. */
831 int arm_arch_iwmmxt = 0;
833 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
834 int arm_arch_iwmmxt2 = 0;
836 /* Nonzero if this chip is an XScale. */
837 int arm_arch_xscale = 0;
839 /* Nonzero if tuning for XScale */
840 int arm_tune_xscale = 0;
842 /* Nonzero if we want to tune for stores that access the write-buffer.
843 This typically means an ARM6 or ARM7 with MMU or MPU. */
844 int arm_tune_wbuf = 0;
846 /* Nonzero if tuning for Cortex-A9. */
847 int arm_tune_cortex_a9 = 0;
849 /* Nonzero if generating Thumb instructions. */
850 int thumb_code = 0;
852 /* Nonzero if generating Thumb-1 instructions. */
853 int thumb1_code = 0;
855 /* Nonzero if we should define __THUMB_INTERWORK__ in the
856 preprocessor.
857 XXX This is a bit of a hack, it's intended to help work around
858 problems in GLD which doesn't understand that armv5t code is
859 interworking clean. */
860 int arm_cpp_interwork = 0;
862 /* Nonzero if chip supports Thumb 2. */
863 int arm_arch_thumb2;
865 /* Nonzero if chip supports integer division instruction. */
866 int arm_arch_arm_hwdiv;
867 int arm_arch_thumb_hwdiv;
869 /* Nonzero if we should use Neon to handle 64-bits operations rather
870 than core registers. */
871 int prefer_neon_for_64bits = 0;
873 /* Nonzero if we shouldn't use literal pools. */
874 bool arm_disable_literal_pool = false;
876 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
877 we must report the mode of the memory reference from
878 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
879 machine_mode output_memory_reference_mode;
881 /* The register number to be used for the PIC offset register. */
882 unsigned arm_pic_register = INVALID_REGNUM;
884 enum arm_pcs arm_pcs_default;
886 /* For an explanation of these variables, see final_prescan_insn below. */
887 int arm_ccfsm_state;
888 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
889 enum arm_cond_code arm_current_cc;
891 rtx arm_target_insn;
892 int arm_target_label;
893 /* The number of conditionally executed insns, including the current insn. */
894 int arm_condexec_count = 0;
895 /* A bitmask specifying the patterns for the IT block.
896 Zero means do not output an IT block before this insn. */
897 int arm_condexec_mask = 0;
898 /* The number of bits used in arm_condexec_mask. */
899 int arm_condexec_masklen = 0;
901 /* Nonzero if chip supports the ARMv8 CRC instructions. */
902 int arm_arch_crc = 0;
904 /* Nonzero if the core has a very small, high-latency, multiply unit. */
905 int arm_m_profile_small_mul = 0;
907 /* The condition codes of the ARM, and the inverse function. */
908 static const char * const arm_condition_codes[] =
910 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
911 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
914 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
915 int arm_regs_in_sequence[] =
917 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
920 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
921 #define streq(string1, string2) (strcmp (string1, string2) == 0)
923 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
924 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
925 | (1 << PIC_OFFSET_TABLE_REGNUM)))
927 /* Initialization code. */
929 struct processors
931 const char *const name;
932 enum processor_type core;
933 const char *arch;
934 enum base_architecture base_arch;
935 const unsigned long flags;
936 const struct tune_params *const tune;
940 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
941 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
942 prefetch_slots, \
943 l1_size, \
944 l1_line_size
946 /* arm generic vectorizer costs. */
947 static const
948 struct cpu_vec_costs arm_default_vec_cost = {
949 1, /* scalar_stmt_cost. */
950 1, /* scalar load_cost. */
951 1, /* scalar_store_cost. */
952 1, /* vec_stmt_cost. */
953 1, /* vec_to_scalar_cost. */
954 1, /* scalar_to_vec_cost. */
955 1, /* vec_align_load_cost. */
956 1, /* vec_unalign_load_cost. */
957 1, /* vec_unalign_store_cost. */
958 1, /* vec_store_cost. */
959 3, /* cond_taken_branch_cost. */
960 1, /* cond_not_taken_branch_cost. */
963 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
964 #include "aarch-cost-tables.h"
968 const struct cpu_cost_table cortexa9_extra_costs =
970 /* ALU */
972 0, /* arith. */
973 0, /* logical. */
974 0, /* shift. */
975 COSTS_N_INSNS (1), /* shift_reg. */
976 COSTS_N_INSNS (1), /* arith_shift. */
977 COSTS_N_INSNS (2), /* arith_shift_reg. */
978 0, /* log_shift. */
979 COSTS_N_INSNS (1), /* log_shift_reg. */
980 COSTS_N_INSNS (1), /* extend. */
981 COSTS_N_INSNS (2), /* extend_arith. */
982 COSTS_N_INSNS (1), /* bfi. */
983 COSTS_N_INSNS (1), /* bfx. */
984 0, /* clz. */
985 0, /* rev. */
986 0, /* non_exec. */
987 true /* non_exec_costs_exec. */
990 /* MULT SImode */
992 COSTS_N_INSNS (3), /* simple. */
993 COSTS_N_INSNS (3), /* flag_setting. */
994 COSTS_N_INSNS (2), /* extend. */
995 COSTS_N_INSNS (3), /* add. */
996 COSTS_N_INSNS (2), /* extend_add. */
997 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
999 /* MULT DImode */
1001 0, /* simple (N/A). */
1002 0, /* flag_setting (N/A). */
1003 COSTS_N_INSNS (4), /* extend. */
1004 0, /* add (N/A). */
1005 COSTS_N_INSNS (4), /* extend_add. */
1006 0 /* idiv (N/A). */
1009 /* LD/ST */
1011 COSTS_N_INSNS (2), /* load. */
1012 COSTS_N_INSNS (2), /* load_sign_extend. */
1013 COSTS_N_INSNS (2), /* ldrd. */
1014 COSTS_N_INSNS (2), /* ldm_1st. */
1015 1, /* ldm_regs_per_insn_1st. */
1016 2, /* ldm_regs_per_insn_subsequent. */
1017 COSTS_N_INSNS (5), /* loadf. */
1018 COSTS_N_INSNS (5), /* loadd. */
1019 COSTS_N_INSNS (1), /* load_unaligned. */
1020 COSTS_N_INSNS (2), /* store. */
1021 COSTS_N_INSNS (2), /* strd. */
1022 COSTS_N_INSNS (2), /* stm_1st. */
1023 1, /* stm_regs_per_insn_1st. */
1024 2, /* stm_regs_per_insn_subsequent. */
1025 COSTS_N_INSNS (1), /* storef. */
1026 COSTS_N_INSNS (1), /* stored. */
1027 COSTS_N_INSNS (1) /* store_unaligned. */
1030 /* FP SFmode */
1032 COSTS_N_INSNS (14), /* div. */
1033 COSTS_N_INSNS (4), /* mult. */
1034 COSTS_N_INSNS (7), /* mult_addsub. */
1035 COSTS_N_INSNS (30), /* fma. */
1036 COSTS_N_INSNS (3), /* addsub. */
1037 COSTS_N_INSNS (1), /* fpconst. */
1038 COSTS_N_INSNS (1), /* neg. */
1039 COSTS_N_INSNS (3), /* compare. */
1040 COSTS_N_INSNS (3), /* widen. */
1041 COSTS_N_INSNS (3), /* narrow. */
1042 COSTS_N_INSNS (3), /* toint. */
1043 COSTS_N_INSNS (3), /* fromint. */
1044 COSTS_N_INSNS (3) /* roundint. */
1046 /* FP DFmode */
1048 COSTS_N_INSNS (24), /* div. */
1049 COSTS_N_INSNS (5), /* mult. */
1050 COSTS_N_INSNS (8), /* mult_addsub. */
1051 COSTS_N_INSNS (30), /* fma. */
1052 COSTS_N_INSNS (3), /* addsub. */
1053 COSTS_N_INSNS (1), /* fpconst. */
1054 COSTS_N_INSNS (1), /* neg. */
1055 COSTS_N_INSNS (3), /* compare. */
1056 COSTS_N_INSNS (3), /* widen. */
1057 COSTS_N_INSNS (3), /* narrow. */
1058 COSTS_N_INSNS (3), /* toint. */
1059 COSTS_N_INSNS (3), /* fromint. */
1060 COSTS_N_INSNS (3) /* roundint. */
1063 /* Vector */
1065 COSTS_N_INSNS (1) /* alu. */
1069 const struct cpu_cost_table cortexa8_extra_costs =
1071 /* ALU */
1073 0, /* arith. */
1074 0, /* logical. */
1075 COSTS_N_INSNS (1), /* shift. */
1076 0, /* shift_reg. */
1077 COSTS_N_INSNS (1), /* arith_shift. */
1078 0, /* arith_shift_reg. */
1079 COSTS_N_INSNS (1), /* log_shift. */
1080 0, /* log_shift_reg. */
1081 0, /* extend. */
1082 0, /* extend_arith. */
1083 0, /* bfi. */
1084 0, /* bfx. */
1085 0, /* clz. */
1086 0, /* rev. */
1087 0, /* non_exec. */
1088 true /* non_exec_costs_exec. */
1091 /* MULT SImode */
1093 COSTS_N_INSNS (1), /* simple. */
1094 COSTS_N_INSNS (1), /* flag_setting. */
1095 COSTS_N_INSNS (1), /* extend. */
1096 COSTS_N_INSNS (1), /* add. */
1097 COSTS_N_INSNS (1), /* extend_add. */
1098 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1100 /* MULT DImode */
1102 0, /* simple (N/A). */
1103 0, /* flag_setting (N/A). */
1104 COSTS_N_INSNS (2), /* extend. */
1105 0, /* add (N/A). */
1106 COSTS_N_INSNS (2), /* extend_add. */
1107 0 /* idiv (N/A). */
1110 /* LD/ST */
1112 COSTS_N_INSNS (1), /* load. */
1113 COSTS_N_INSNS (1), /* load_sign_extend. */
1114 COSTS_N_INSNS (1), /* ldrd. */
1115 COSTS_N_INSNS (1), /* ldm_1st. */
1116 1, /* ldm_regs_per_insn_1st. */
1117 2, /* ldm_regs_per_insn_subsequent. */
1118 COSTS_N_INSNS (1), /* loadf. */
1119 COSTS_N_INSNS (1), /* loadd. */
1120 COSTS_N_INSNS (1), /* load_unaligned. */
1121 COSTS_N_INSNS (1), /* store. */
1122 COSTS_N_INSNS (1), /* strd. */
1123 COSTS_N_INSNS (1), /* stm_1st. */
1124 1, /* stm_regs_per_insn_1st. */
1125 2, /* stm_regs_per_insn_subsequent. */
1126 COSTS_N_INSNS (1), /* storef. */
1127 COSTS_N_INSNS (1), /* stored. */
1128 COSTS_N_INSNS (1) /* store_unaligned. */
1131 /* FP SFmode */
1133 COSTS_N_INSNS (36), /* div. */
1134 COSTS_N_INSNS (11), /* mult. */
1135 COSTS_N_INSNS (20), /* mult_addsub. */
1136 COSTS_N_INSNS (30), /* fma. */
1137 COSTS_N_INSNS (9), /* addsub. */
1138 COSTS_N_INSNS (3), /* fpconst. */
1139 COSTS_N_INSNS (3), /* neg. */
1140 COSTS_N_INSNS (6), /* compare. */
1141 COSTS_N_INSNS (4), /* widen. */
1142 COSTS_N_INSNS (4), /* narrow. */
1143 COSTS_N_INSNS (8), /* toint. */
1144 COSTS_N_INSNS (8), /* fromint. */
1145 COSTS_N_INSNS (8) /* roundint. */
1147 /* FP DFmode */
1149 COSTS_N_INSNS (64), /* div. */
1150 COSTS_N_INSNS (16), /* mult. */
1151 COSTS_N_INSNS (25), /* mult_addsub. */
1152 COSTS_N_INSNS (30), /* fma. */
1153 COSTS_N_INSNS (9), /* addsub. */
1154 COSTS_N_INSNS (3), /* fpconst. */
1155 COSTS_N_INSNS (3), /* neg. */
1156 COSTS_N_INSNS (6), /* compare. */
1157 COSTS_N_INSNS (6), /* widen. */
1158 COSTS_N_INSNS (6), /* narrow. */
1159 COSTS_N_INSNS (8), /* toint. */
1160 COSTS_N_INSNS (8), /* fromint. */
1161 COSTS_N_INSNS (8) /* roundint. */
1164 /* Vector */
1166 COSTS_N_INSNS (1) /* alu. */
1170 const struct cpu_cost_table cortexa5_extra_costs =
1172 /* ALU */
1174 0, /* arith. */
1175 0, /* logical. */
1176 COSTS_N_INSNS (1), /* shift. */
1177 COSTS_N_INSNS (1), /* shift_reg. */
1178 COSTS_N_INSNS (1), /* arith_shift. */
1179 COSTS_N_INSNS (1), /* arith_shift_reg. */
1180 COSTS_N_INSNS (1), /* log_shift. */
1181 COSTS_N_INSNS (1), /* log_shift_reg. */
1182 COSTS_N_INSNS (1), /* extend. */
1183 COSTS_N_INSNS (1), /* extend_arith. */
1184 COSTS_N_INSNS (1), /* bfi. */
1185 COSTS_N_INSNS (1), /* bfx. */
1186 COSTS_N_INSNS (1), /* clz. */
1187 COSTS_N_INSNS (1), /* rev. */
1188 0, /* non_exec. */
1189 true /* non_exec_costs_exec. */
1193 /* MULT SImode */
1195 0, /* simple. */
1196 COSTS_N_INSNS (1), /* flag_setting. */
1197 COSTS_N_INSNS (1), /* extend. */
1198 COSTS_N_INSNS (1), /* add. */
1199 COSTS_N_INSNS (1), /* extend_add. */
1200 COSTS_N_INSNS (7) /* idiv. */
1202 /* MULT DImode */
1204 0, /* simple (N/A). */
1205 0, /* flag_setting (N/A). */
1206 COSTS_N_INSNS (1), /* extend. */
1207 0, /* add. */
1208 COSTS_N_INSNS (2), /* extend_add. */
1209 0 /* idiv (N/A). */
1212 /* LD/ST */
1214 COSTS_N_INSNS (1), /* load. */
1215 COSTS_N_INSNS (1), /* load_sign_extend. */
1216 COSTS_N_INSNS (6), /* ldrd. */
1217 COSTS_N_INSNS (1), /* ldm_1st. */
1218 1, /* ldm_regs_per_insn_1st. */
1219 2, /* ldm_regs_per_insn_subsequent. */
1220 COSTS_N_INSNS (2), /* loadf. */
1221 COSTS_N_INSNS (4), /* loadd. */
1222 COSTS_N_INSNS (1), /* load_unaligned. */
1223 COSTS_N_INSNS (1), /* store. */
1224 COSTS_N_INSNS (3), /* strd. */
1225 COSTS_N_INSNS (1), /* stm_1st. */
1226 1, /* stm_regs_per_insn_1st. */
1227 2, /* stm_regs_per_insn_subsequent. */
1228 COSTS_N_INSNS (2), /* storef. */
1229 COSTS_N_INSNS (2), /* stored. */
1230 COSTS_N_INSNS (1) /* store_unaligned. */
1233 /* FP SFmode */
1235 COSTS_N_INSNS (15), /* div. */
1236 COSTS_N_INSNS (3), /* mult. */
1237 COSTS_N_INSNS (7), /* mult_addsub. */
1238 COSTS_N_INSNS (7), /* fma. */
1239 COSTS_N_INSNS (3), /* addsub. */
1240 COSTS_N_INSNS (3), /* fpconst. */
1241 COSTS_N_INSNS (3), /* neg. */
1242 COSTS_N_INSNS (3), /* compare. */
1243 COSTS_N_INSNS (3), /* widen. */
1244 COSTS_N_INSNS (3), /* narrow. */
1245 COSTS_N_INSNS (3), /* toint. */
1246 COSTS_N_INSNS (3), /* fromint. */
1247 COSTS_N_INSNS (3) /* roundint. */
1249 /* FP DFmode */
1251 COSTS_N_INSNS (30), /* div. */
1252 COSTS_N_INSNS (6), /* mult. */
1253 COSTS_N_INSNS (10), /* mult_addsub. */
1254 COSTS_N_INSNS (7), /* fma. */
1255 COSTS_N_INSNS (3), /* addsub. */
1256 COSTS_N_INSNS (3), /* fpconst. */
1257 COSTS_N_INSNS (3), /* neg. */
1258 COSTS_N_INSNS (3), /* compare. */
1259 COSTS_N_INSNS (3), /* widen. */
1260 COSTS_N_INSNS (3), /* narrow. */
1261 COSTS_N_INSNS (3), /* toint. */
1262 COSTS_N_INSNS (3), /* fromint. */
1263 COSTS_N_INSNS (3) /* roundint. */
1266 /* Vector */
1268 COSTS_N_INSNS (1) /* alu. */
1273 const struct cpu_cost_table cortexa7_extra_costs =
1275 /* ALU */
1277 0, /* arith. */
1278 0, /* logical. */
1279 COSTS_N_INSNS (1), /* shift. */
1280 COSTS_N_INSNS (1), /* shift_reg. */
1281 COSTS_N_INSNS (1), /* arith_shift. */
1282 COSTS_N_INSNS (1), /* arith_shift_reg. */
1283 COSTS_N_INSNS (1), /* log_shift. */
1284 COSTS_N_INSNS (1), /* log_shift_reg. */
1285 COSTS_N_INSNS (1), /* extend. */
1286 COSTS_N_INSNS (1), /* extend_arith. */
1287 COSTS_N_INSNS (1), /* bfi. */
1288 COSTS_N_INSNS (1), /* bfx. */
1289 COSTS_N_INSNS (1), /* clz. */
1290 COSTS_N_INSNS (1), /* rev. */
1291 0, /* non_exec. */
1292 true /* non_exec_costs_exec. */
1296 /* MULT SImode */
1298 0, /* simple. */
1299 COSTS_N_INSNS (1), /* flag_setting. */
1300 COSTS_N_INSNS (1), /* extend. */
1301 COSTS_N_INSNS (1), /* add. */
1302 COSTS_N_INSNS (1), /* extend_add. */
1303 COSTS_N_INSNS (7) /* idiv. */
1305 /* MULT DImode */
1307 0, /* simple (N/A). */
1308 0, /* flag_setting (N/A). */
1309 COSTS_N_INSNS (1), /* extend. */
1310 0, /* add. */
1311 COSTS_N_INSNS (2), /* extend_add. */
1312 0 /* idiv (N/A). */
1315 /* LD/ST */
1317 COSTS_N_INSNS (1), /* load. */
1318 COSTS_N_INSNS (1), /* load_sign_extend. */
1319 COSTS_N_INSNS (3), /* ldrd. */
1320 COSTS_N_INSNS (1), /* ldm_1st. */
1321 1, /* ldm_regs_per_insn_1st. */
1322 2, /* ldm_regs_per_insn_subsequent. */
1323 COSTS_N_INSNS (2), /* loadf. */
1324 COSTS_N_INSNS (2), /* loadd. */
1325 COSTS_N_INSNS (1), /* load_unaligned. */
1326 COSTS_N_INSNS (1), /* store. */
1327 COSTS_N_INSNS (3), /* strd. */
1328 COSTS_N_INSNS (1), /* stm_1st. */
1329 1, /* stm_regs_per_insn_1st. */
1330 2, /* stm_regs_per_insn_subsequent. */
1331 COSTS_N_INSNS (2), /* storef. */
1332 COSTS_N_INSNS (2), /* stored. */
1333 COSTS_N_INSNS (1) /* store_unaligned. */
1336 /* FP SFmode */
1338 COSTS_N_INSNS (15), /* div. */
1339 COSTS_N_INSNS (3), /* mult. */
1340 COSTS_N_INSNS (7), /* mult_addsub. */
1341 COSTS_N_INSNS (7), /* fma. */
1342 COSTS_N_INSNS (3), /* addsub. */
1343 COSTS_N_INSNS (3), /* fpconst. */
1344 COSTS_N_INSNS (3), /* neg. */
1345 COSTS_N_INSNS (3), /* compare. */
1346 COSTS_N_INSNS (3), /* widen. */
1347 COSTS_N_INSNS (3), /* narrow. */
1348 COSTS_N_INSNS (3), /* toint. */
1349 COSTS_N_INSNS (3), /* fromint. */
1350 COSTS_N_INSNS (3) /* roundint. */
1352 /* FP DFmode */
1354 COSTS_N_INSNS (30), /* div. */
1355 COSTS_N_INSNS (6), /* mult. */
1356 COSTS_N_INSNS (10), /* mult_addsub. */
1357 COSTS_N_INSNS (7), /* fma. */
1358 COSTS_N_INSNS (3), /* addsub. */
1359 COSTS_N_INSNS (3), /* fpconst. */
1360 COSTS_N_INSNS (3), /* neg. */
1361 COSTS_N_INSNS (3), /* compare. */
1362 COSTS_N_INSNS (3), /* widen. */
1363 COSTS_N_INSNS (3), /* narrow. */
1364 COSTS_N_INSNS (3), /* toint. */
1365 COSTS_N_INSNS (3), /* fromint. */
1366 COSTS_N_INSNS (3) /* roundint. */
1369 /* Vector */
1371 COSTS_N_INSNS (1) /* alu. */
1375 const struct cpu_cost_table cortexa12_extra_costs =
1377 /* ALU */
1379 0, /* arith. */
1380 0, /* logical. */
1381 0, /* shift. */
1382 COSTS_N_INSNS (1), /* shift_reg. */
1383 COSTS_N_INSNS (1), /* arith_shift. */
1384 COSTS_N_INSNS (1), /* arith_shift_reg. */
1385 COSTS_N_INSNS (1), /* log_shift. */
1386 COSTS_N_INSNS (1), /* log_shift_reg. */
1387 0, /* extend. */
1388 COSTS_N_INSNS (1), /* extend_arith. */
1389 0, /* bfi. */
1390 COSTS_N_INSNS (1), /* bfx. */
1391 COSTS_N_INSNS (1), /* clz. */
1392 COSTS_N_INSNS (1), /* rev. */
1393 0, /* non_exec. */
1394 true /* non_exec_costs_exec. */
1396 /* MULT SImode */
1399 COSTS_N_INSNS (2), /* simple. */
1400 COSTS_N_INSNS (3), /* flag_setting. */
1401 COSTS_N_INSNS (2), /* extend. */
1402 COSTS_N_INSNS (3), /* add. */
1403 COSTS_N_INSNS (2), /* extend_add. */
1404 COSTS_N_INSNS (18) /* idiv. */
1406 /* MULT DImode */
1408 0, /* simple (N/A). */
1409 0, /* flag_setting (N/A). */
1410 COSTS_N_INSNS (3), /* extend. */
1411 0, /* add (N/A). */
1412 COSTS_N_INSNS (3), /* extend_add. */
1413 0 /* idiv (N/A). */
1416 /* LD/ST */
1418 COSTS_N_INSNS (3), /* load. */
1419 COSTS_N_INSNS (3), /* load_sign_extend. */
1420 COSTS_N_INSNS (3), /* ldrd. */
1421 COSTS_N_INSNS (3), /* ldm_1st. */
1422 1, /* ldm_regs_per_insn_1st. */
1423 2, /* ldm_regs_per_insn_subsequent. */
1424 COSTS_N_INSNS (3), /* loadf. */
1425 COSTS_N_INSNS (3), /* loadd. */
1426 0, /* load_unaligned. */
1427 0, /* store. */
1428 0, /* strd. */
1429 0, /* stm_1st. */
1430 1, /* stm_regs_per_insn_1st. */
1431 2, /* stm_regs_per_insn_subsequent. */
1432 COSTS_N_INSNS (2), /* storef. */
1433 COSTS_N_INSNS (2), /* stored. */
1434 0 /* store_unaligned. */
1437 /* FP SFmode */
1439 COSTS_N_INSNS (17), /* div. */
1440 COSTS_N_INSNS (4), /* mult. */
1441 COSTS_N_INSNS (8), /* mult_addsub. */
1442 COSTS_N_INSNS (8), /* fma. */
1443 COSTS_N_INSNS (4), /* addsub. */
1444 COSTS_N_INSNS (2), /* fpconst. */
1445 COSTS_N_INSNS (2), /* neg. */
1446 COSTS_N_INSNS (2), /* compare. */
1447 COSTS_N_INSNS (4), /* widen. */
1448 COSTS_N_INSNS (4), /* narrow. */
1449 COSTS_N_INSNS (4), /* toint. */
1450 COSTS_N_INSNS (4), /* fromint. */
1451 COSTS_N_INSNS (4) /* roundint. */
1453 /* FP DFmode */
1455 COSTS_N_INSNS (31), /* div. */
1456 COSTS_N_INSNS (4), /* mult. */
1457 COSTS_N_INSNS (8), /* mult_addsub. */
1458 COSTS_N_INSNS (8), /* fma. */
1459 COSTS_N_INSNS (4), /* addsub. */
1460 COSTS_N_INSNS (2), /* fpconst. */
1461 COSTS_N_INSNS (2), /* neg. */
1462 COSTS_N_INSNS (2), /* compare. */
1463 COSTS_N_INSNS (4), /* widen. */
1464 COSTS_N_INSNS (4), /* narrow. */
1465 COSTS_N_INSNS (4), /* toint. */
1466 COSTS_N_INSNS (4), /* fromint. */
1467 COSTS_N_INSNS (4) /* roundint. */
1470 /* Vector */
1472 COSTS_N_INSNS (1) /* alu. */
1476 const struct cpu_cost_table cortexa15_extra_costs =
1478 /* ALU */
1480 0, /* arith. */
1481 0, /* logical. */
1482 0, /* shift. */
1483 0, /* shift_reg. */
1484 COSTS_N_INSNS (1), /* arith_shift. */
1485 COSTS_N_INSNS (1), /* arith_shift_reg. */
1486 COSTS_N_INSNS (1), /* log_shift. */
1487 COSTS_N_INSNS (1), /* log_shift_reg. */
1488 0, /* extend. */
1489 COSTS_N_INSNS (1), /* extend_arith. */
1490 COSTS_N_INSNS (1), /* bfi. */
1491 0, /* bfx. */
1492 0, /* clz. */
1493 0, /* rev. */
1494 0, /* non_exec. */
1495 true /* non_exec_costs_exec. */
1497 /* MULT SImode */
1500 COSTS_N_INSNS (2), /* simple. */
1501 COSTS_N_INSNS (3), /* flag_setting. */
1502 COSTS_N_INSNS (2), /* extend. */
1503 COSTS_N_INSNS (2), /* add. */
1504 COSTS_N_INSNS (2), /* extend_add. */
1505 COSTS_N_INSNS (18) /* idiv. */
1507 /* MULT DImode */
1509 0, /* simple (N/A). */
1510 0, /* flag_setting (N/A). */
1511 COSTS_N_INSNS (3), /* extend. */
1512 0, /* add (N/A). */
1513 COSTS_N_INSNS (3), /* extend_add. */
1514 0 /* idiv (N/A). */
1517 /* LD/ST */
1519 COSTS_N_INSNS (3), /* load. */
1520 COSTS_N_INSNS (3), /* load_sign_extend. */
1521 COSTS_N_INSNS (3), /* ldrd. */
1522 COSTS_N_INSNS (4), /* ldm_1st. */
1523 1, /* ldm_regs_per_insn_1st. */
1524 2, /* ldm_regs_per_insn_subsequent. */
1525 COSTS_N_INSNS (4), /* loadf. */
1526 COSTS_N_INSNS (4), /* loadd. */
1527 0, /* load_unaligned. */
1528 0, /* store. */
1529 0, /* strd. */
1530 COSTS_N_INSNS (1), /* stm_1st. */
1531 1, /* stm_regs_per_insn_1st. */
1532 2, /* stm_regs_per_insn_subsequent. */
1533 0, /* storef. */
1534 0, /* stored. */
1535 0 /* store_unaligned. */
1538 /* FP SFmode */
1540 COSTS_N_INSNS (17), /* div. */
1541 COSTS_N_INSNS (4), /* mult. */
1542 COSTS_N_INSNS (8), /* mult_addsub. */
1543 COSTS_N_INSNS (8), /* fma. */
1544 COSTS_N_INSNS (4), /* addsub. */
1545 COSTS_N_INSNS (2), /* fpconst. */
1546 COSTS_N_INSNS (2), /* neg. */
1547 COSTS_N_INSNS (5), /* compare. */
1548 COSTS_N_INSNS (4), /* widen. */
1549 COSTS_N_INSNS (4), /* narrow. */
1550 COSTS_N_INSNS (4), /* toint. */
1551 COSTS_N_INSNS (4), /* fromint. */
1552 COSTS_N_INSNS (4) /* roundint. */
1554 /* FP DFmode */
1556 COSTS_N_INSNS (31), /* div. */
1557 COSTS_N_INSNS (4), /* mult. */
1558 COSTS_N_INSNS (8), /* mult_addsub. */
1559 COSTS_N_INSNS (8), /* fma. */
1560 COSTS_N_INSNS (4), /* addsub. */
1561 COSTS_N_INSNS (2), /* fpconst. */
1562 COSTS_N_INSNS (2), /* neg. */
1563 COSTS_N_INSNS (2), /* compare. */
1564 COSTS_N_INSNS (4), /* widen. */
1565 COSTS_N_INSNS (4), /* narrow. */
1566 COSTS_N_INSNS (4), /* toint. */
1567 COSTS_N_INSNS (4), /* fromint. */
1568 COSTS_N_INSNS (4) /* roundint. */
1571 /* Vector */
1573 COSTS_N_INSNS (1) /* alu. */
1577 const struct cpu_cost_table v7m_extra_costs =
1579 /* ALU */
1581 0, /* arith. */
1582 0, /* logical. */
1583 0, /* shift. */
1584 0, /* shift_reg. */
1585 0, /* arith_shift. */
1586 COSTS_N_INSNS (1), /* arith_shift_reg. */
1587 0, /* log_shift. */
1588 COSTS_N_INSNS (1), /* log_shift_reg. */
1589 0, /* extend. */
1590 COSTS_N_INSNS (1), /* extend_arith. */
1591 0, /* bfi. */
1592 0, /* bfx. */
1593 0, /* clz. */
1594 0, /* rev. */
1595 COSTS_N_INSNS (1), /* non_exec. */
1596 false /* non_exec_costs_exec. */
1599 /* MULT SImode */
1601 COSTS_N_INSNS (1), /* simple. */
1602 COSTS_N_INSNS (1), /* flag_setting. */
1603 COSTS_N_INSNS (2), /* extend. */
1604 COSTS_N_INSNS (1), /* add. */
1605 COSTS_N_INSNS (3), /* extend_add. */
1606 COSTS_N_INSNS (8) /* idiv. */
1608 /* MULT DImode */
1610 0, /* simple (N/A). */
1611 0, /* flag_setting (N/A). */
1612 COSTS_N_INSNS (2), /* extend. */
1613 0, /* add (N/A). */
1614 COSTS_N_INSNS (3), /* extend_add. */
1615 0 /* idiv (N/A). */
1618 /* LD/ST */
1620 COSTS_N_INSNS (2), /* load. */
1621 0, /* load_sign_extend. */
1622 COSTS_N_INSNS (3), /* ldrd. */
1623 COSTS_N_INSNS (2), /* ldm_1st. */
1624 1, /* ldm_regs_per_insn_1st. */
1625 1, /* ldm_regs_per_insn_subsequent. */
1626 COSTS_N_INSNS (2), /* loadf. */
1627 COSTS_N_INSNS (3), /* loadd. */
1628 COSTS_N_INSNS (1), /* load_unaligned. */
1629 COSTS_N_INSNS (2), /* store. */
1630 COSTS_N_INSNS (3), /* strd. */
1631 COSTS_N_INSNS (2), /* stm_1st. */
1632 1, /* stm_regs_per_insn_1st. */
1633 1, /* stm_regs_per_insn_subsequent. */
1634 COSTS_N_INSNS (2), /* storef. */
1635 COSTS_N_INSNS (3), /* stored. */
1636 COSTS_N_INSNS (1) /* store_unaligned. */
1639 /* FP SFmode */
1641 COSTS_N_INSNS (7), /* div. */
1642 COSTS_N_INSNS (2), /* mult. */
1643 COSTS_N_INSNS (5), /* mult_addsub. */
1644 COSTS_N_INSNS (3), /* fma. */
1645 COSTS_N_INSNS (1), /* addsub. */
1646 0, /* fpconst. */
1647 0, /* neg. */
1648 0, /* compare. */
1649 0, /* widen. */
1650 0, /* narrow. */
1651 0, /* toint. */
1652 0, /* fromint. */
1653 0 /* roundint. */
1655 /* FP DFmode */
1657 COSTS_N_INSNS (15), /* div. */
1658 COSTS_N_INSNS (5), /* mult. */
1659 COSTS_N_INSNS (7), /* mult_addsub. */
1660 COSTS_N_INSNS (7), /* fma. */
1661 COSTS_N_INSNS (3), /* addsub. */
1662 0, /* fpconst. */
1663 0, /* neg. */
1664 0, /* compare. */
1665 0, /* widen. */
1666 0, /* narrow. */
1667 0, /* toint. */
1668 0, /* fromint. */
1669 0 /* roundint. */
1672 /* Vector */
1674 COSTS_N_INSNS (1) /* alu. */
1678 #define ARM_FUSE_NOTHING (0)
1679 #define ARM_FUSE_MOVW_MOVT (1 << 0)
1681 const struct tune_params arm_slowmul_tune =
1683 arm_slowmul_rtx_costs,
1684 NULL,
1685 NULL, /* Sched adj cost. */
1686 3, /* Constant limit. */
1687 5, /* Max cond insns. */
1688 ARM_PREFETCH_NOT_BENEFICIAL,
1689 true, /* Prefer constant pool. */
1690 arm_default_branch_cost,
1691 false, /* Prefer LDRD/STRD. */
1692 {true, true}, /* Prefer non short circuit. */
1693 &arm_default_vec_cost, /* Vectorizer costs. */
1694 false, /* Prefer Neon for 64-bits bitops. */
1695 false, false, /* Prefer 32-bit encodings. */
1696 false, /* Prefer Neon for stringops. */
1697 8, /* Maximum insns to inline memset. */
1698 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1699 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
1702 const struct tune_params arm_fastmul_tune =
1704 arm_fastmul_rtx_costs,
1705 NULL,
1706 NULL, /* Sched adj cost. */
1707 1, /* Constant limit. */
1708 5, /* Max cond insns. */
1709 ARM_PREFETCH_NOT_BENEFICIAL,
1710 true, /* Prefer constant pool. */
1711 arm_default_branch_cost,
1712 false, /* Prefer LDRD/STRD. */
1713 {true, true}, /* Prefer non short circuit. */
1714 &arm_default_vec_cost, /* Vectorizer costs. */
1715 false, /* Prefer Neon for 64-bits bitops. */
1716 false, false, /* Prefer 32-bit encodings. */
1717 false, /* Prefer Neon for stringops. */
1718 8, /* Maximum insns to inline memset. */
1719 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1720 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
1723 /* StrongARM has early execution of branches, so a sequence that is worth
1724 skipping is shorter. Set max_insns_skipped to a lower value. */
1726 const struct tune_params arm_strongarm_tune =
1728 arm_fastmul_rtx_costs,
1729 NULL,
1730 NULL, /* Sched adj cost. */
1731 1, /* Constant limit. */
1732 3, /* Max cond insns. */
1733 ARM_PREFETCH_NOT_BENEFICIAL,
1734 true, /* Prefer constant pool. */
1735 arm_default_branch_cost,
1736 false, /* Prefer LDRD/STRD. */
1737 {true, true}, /* Prefer non short circuit. */
1738 &arm_default_vec_cost, /* Vectorizer costs. */
1739 false, /* Prefer Neon for 64-bits bitops. */
1740 false, false, /* Prefer 32-bit encodings. */
1741 false, /* Prefer Neon for stringops. */
1742 8, /* Maximum insns to inline memset. */
1743 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1744 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
1747 const struct tune_params arm_xscale_tune =
1749 arm_xscale_rtx_costs,
1750 NULL,
1751 xscale_sched_adjust_cost,
1752 2, /* Constant limit. */
1753 3, /* Max cond insns. */
1754 ARM_PREFETCH_NOT_BENEFICIAL,
1755 true, /* Prefer constant pool. */
1756 arm_default_branch_cost,
1757 false, /* Prefer LDRD/STRD. */
1758 {true, true}, /* Prefer non short circuit. */
1759 &arm_default_vec_cost, /* Vectorizer costs. */
1760 false, /* Prefer Neon for 64-bits bitops. */
1761 false, false, /* Prefer 32-bit encodings. */
1762 false, /* Prefer Neon for stringops. */
1763 8, /* Maximum insns to inline memset. */
1764 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1765 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
1768 const struct tune_params arm_9e_tune =
1770 arm_9e_rtx_costs,
1771 NULL,
1772 NULL, /* Sched adj cost. */
1773 1, /* Constant limit. */
1774 5, /* Max cond insns. */
1775 ARM_PREFETCH_NOT_BENEFICIAL,
1776 true, /* Prefer constant pool. */
1777 arm_default_branch_cost,
1778 false, /* Prefer LDRD/STRD. */
1779 {true, true}, /* Prefer non short circuit. */
1780 &arm_default_vec_cost, /* Vectorizer costs. */
1781 false, /* Prefer Neon for 64-bits bitops. */
1782 false, false, /* Prefer 32-bit encodings. */
1783 false, /* Prefer Neon for stringops. */
1784 8, /* Maximum insns to inline memset. */
1785 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1786 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
1789 const struct tune_params arm_v6t2_tune =
1791 arm_9e_rtx_costs,
1792 NULL,
1793 NULL, /* Sched adj cost. */
1794 1, /* Constant limit. */
1795 5, /* Max cond insns. */
1796 ARM_PREFETCH_NOT_BENEFICIAL,
1797 false, /* Prefer constant pool. */
1798 arm_default_branch_cost,
1799 false, /* Prefer LDRD/STRD. */
1800 {true, true}, /* Prefer non short circuit. */
1801 &arm_default_vec_cost, /* Vectorizer costs. */
1802 false, /* Prefer Neon for 64-bits bitops. */
1803 false, false, /* Prefer 32-bit encodings. */
1804 false, /* Prefer Neon for stringops. */
1805 8, /* Maximum insns to inline memset. */
1806 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1807 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
1810 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1811 const struct tune_params arm_cortex_tune =
1813 arm_9e_rtx_costs,
1814 &generic_extra_costs,
1815 NULL, /* Sched adj cost. */
1816 1, /* Constant limit. */
1817 5, /* Max cond insns. */
1818 ARM_PREFETCH_NOT_BENEFICIAL,
1819 false, /* Prefer constant pool. */
1820 arm_default_branch_cost,
1821 false, /* Prefer LDRD/STRD. */
1822 {true, true}, /* Prefer non short circuit. */
1823 &arm_default_vec_cost, /* Vectorizer costs. */
1824 false, /* Prefer Neon for 64-bits bitops. */
1825 false, false, /* Prefer 32-bit encodings. */
1826 false, /* Prefer Neon for stringops. */
1827 8, /* Maximum insns to inline memset. */
1828 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1829 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
1832 const struct tune_params arm_cortex_a8_tune =
1834 arm_9e_rtx_costs,
1835 &cortexa8_extra_costs,
1836 NULL, /* Sched adj cost. */
1837 1, /* Constant limit. */
1838 5, /* Max cond insns. */
1839 ARM_PREFETCH_NOT_BENEFICIAL,
1840 false, /* Prefer constant pool. */
1841 arm_default_branch_cost,
1842 false, /* Prefer LDRD/STRD. */
1843 {true, true}, /* Prefer non short circuit. */
1844 &arm_default_vec_cost, /* Vectorizer costs. */
1845 false, /* Prefer Neon for 64-bits bitops. */
1846 false, false, /* Prefer 32-bit encodings. */
1847 true, /* Prefer Neon for stringops. */
1848 8, /* Maximum insns to inline memset. */
1849 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1850 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
1853 const struct tune_params arm_cortex_a7_tune =
1855 arm_9e_rtx_costs,
1856 &cortexa7_extra_costs,
1857 NULL,
1858 1, /* Constant limit. */
1859 5, /* Max cond insns. */
1860 ARM_PREFETCH_NOT_BENEFICIAL,
1861 false, /* Prefer constant pool. */
1862 arm_default_branch_cost,
1863 false, /* Prefer LDRD/STRD. */
1864 {true, true}, /* Prefer non short circuit. */
1865 &arm_default_vec_cost, /* Vectorizer costs. */
1866 false, /* Prefer Neon for 64-bits bitops. */
1867 false, false, /* Prefer 32-bit encodings. */
1868 true, /* Prefer Neon for stringops. */
1869 8, /* Maximum insns to inline memset. */
1870 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1871 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
1874 const struct tune_params arm_cortex_a15_tune =
1876 arm_9e_rtx_costs,
1877 &cortexa15_extra_costs,
1878 NULL, /* Sched adj cost. */
1879 1, /* Constant limit. */
1880 2, /* Max cond insns. */
1881 ARM_PREFETCH_NOT_BENEFICIAL,
1882 false, /* Prefer constant pool. */
1883 arm_default_branch_cost,
1884 true, /* Prefer LDRD/STRD. */
1885 {true, true}, /* Prefer non short circuit. */
1886 &arm_default_vec_cost, /* Vectorizer costs. */
1887 false, /* Prefer Neon for 64-bits bitops. */
1888 true, true, /* Prefer 32-bit encodings. */
1889 true, /* Prefer Neon for stringops. */
1890 8, /* Maximum insns to inline memset. */
1891 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1892 ARM_SCHED_AUTOPREF_FULL /* Sched L2 autopref. */
1895 const struct tune_params arm_cortex_a53_tune =
1897 arm_9e_rtx_costs,
1898 &cortexa53_extra_costs,
1899 NULL, /* Scheduler cost adjustment. */
1900 1, /* Constant limit. */
1901 5, /* Max cond insns. */
1902 ARM_PREFETCH_NOT_BENEFICIAL,
1903 false, /* Prefer constant pool. */
1904 arm_default_branch_cost,
1905 false, /* Prefer LDRD/STRD. */
1906 {true, true}, /* Prefer non short circuit. */
1907 &arm_default_vec_cost, /* Vectorizer costs. */
1908 false, /* Prefer Neon for 64-bits bitops. */
1909 false, false, /* Prefer 32-bit encodings. */
1910 true, /* Prefer Neon for stringops. */
1911 8, /* Maximum insns to inline memset. */
1912 ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */
1913 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
1916 const struct tune_params arm_cortex_a57_tune =
1918 arm_9e_rtx_costs,
1919 &cortexa57_extra_costs,
1920 NULL, /* Scheduler cost adjustment. */
1921 1, /* Constant limit. */
1922 2, /* Max cond insns. */
1923 ARM_PREFETCH_NOT_BENEFICIAL,
1924 false, /* Prefer constant pool. */
1925 arm_default_branch_cost,
1926 true, /* Prefer LDRD/STRD. */
1927 {true, true}, /* Prefer non short circuit. */
1928 &arm_default_vec_cost, /* Vectorizer costs. */
1929 false, /* Prefer Neon for 64-bits bitops. */
1930 true, true, /* Prefer 32-bit encodings. */
1931 true, /* Prefer Neon for stringops. */
1932 8, /* Maximum insns to inline memset. */
1933 ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */
1934 ARM_SCHED_AUTOPREF_FULL /* Sched L2 autopref. */
1937 const struct tune_params arm_xgene1_tune =
1939 arm_9e_rtx_costs,
1940 &xgene1_extra_costs,
1941 NULL, /* Scheduler cost adjustment. */
1942 1, /* Constant limit. */
1943 2, /* Max cond insns. */
1944 ARM_PREFETCH_NOT_BENEFICIAL,
1945 false, /* Prefer constant pool. */
1946 arm_default_branch_cost,
1947 true, /* Prefer LDRD/STRD. */
1948 {true, true}, /* Prefer non short circuit. */
1949 &arm_default_vec_cost, /* Vectorizer costs. */
1950 false, /* Prefer Neon for 64-bits bitops. */
1951 true, true, /* Prefer 32-bit encodings. */
1952 false, /* Prefer Neon for stringops. */
1953 32, /* Maximum insns to inline memset. */
1954 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1955 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
1958 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1959 less appealing. Set max_insns_skipped to a low value. */
1961 const struct tune_params arm_cortex_a5_tune =
1963 arm_9e_rtx_costs,
1964 &cortexa5_extra_costs,
1965 NULL, /* Sched adj cost. */
1966 1, /* Constant limit. */
1967 1, /* Max cond insns. */
1968 ARM_PREFETCH_NOT_BENEFICIAL,
1969 false, /* Prefer constant pool. */
1970 arm_cortex_a5_branch_cost,
1971 false, /* Prefer LDRD/STRD. */
1972 {false, false}, /* Prefer non short circuit. */
1973 &arm_default_vec_cost, /* Vectorizer costs. */
1974 false, /* Prefer Neon for 64-bits bitops. */
1975 false, false, /* Prefer 32-bit encodings. */
1976 true, /* Prefer Neon for stringops. */
1977 8, /* Maximum insns to inline memset. */
1978 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1979 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
1982 const struct tune_params arm_cortex_a9_tune =
1984 arm_9e_rtx_costs,
1985 &cortexa9_extra_costs,
1986 cortex_a9_sched_adjust_cost,
1987 1, /* Constant limit. */
1988 5, /* Max cond insns. */
1989 ARM_PREFETCH_BENEFICIAL(4,32,32),
1990 false, /* Prefer constant pool. */
1991 arm_default_branch_cost,
1992 false, /* Prefer LDRD/STRD. */
1993 {true, true}, /* Prefer non short circuit. */
1994 &arm_default_vec_cost, /* Vectorizer costs. */
1995 false, /* Prefer Neon for 64-bits bitops. */
1996 false, false, /* Prefer 32-bit encodings. */
1997 false, /* Prefer Neon for stringops. */
1998 8, /* Maximum insns to inline memset. */
1999 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
2000 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
2003 const struct tune_params arm_cortex_a12_tune =
2005 arm_9e_rtx_costs,
2006 &cortexa12_extra_costs,
2007 NULL, /* Sched adj cost. */
2008 1, /* Constant limit. */
2009 2, /* Max cond insns. */
2010 ARM_PREFETCH_NOT_BENEFICIAL,
2011 false, /* Prefer constant pool. */
2012 arm_default_branch_cost,
2013 true, /* Prefer LDRD/STRD. */
2014 {true, true}, /* Prefer non short circuit. */
2015 &arm_default_vec_cost, /* Vectorizer costs. */
2016 false, /* Prefer Neon for 64-bits bitops. */
2017 true, true, /* Prefer 32-bit encodings. */
2018 true, /* Prefer Neon for stringops. */
2019 8, /* Maximum insns to inline memset. */
2020 ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */
2021 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
2024 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2025 cycle to execute each. An LDR from the constant pool also takes two cycles
2026 to execute, but mildly increases pipelining opportunity (consecutive
2027 loads/stores can be pipelined together, saving one cycle), and may also
2028 improve icache utilisation. Hence we prefer the constant pool for such
2029 processors. */
2031 const struct tune_params arm_v7m_tune =
2033 arm_9e_rtx_costs,
2034 &v7m_extra_costs,
2035 NULL, /* Sched adj cost. */
2036 1, /* Constant limit. */
2037 2, /* Max cond insns. */
2038 ARM_PREFETCH_NOT_BENEFICIAL,
2039 true, /* Prefer constant pool. */
2040 arm_cortex_m_branch_cost,
2041 false, /* Prefer LDRD/STRD. */
2042 {false, false}, /* Prefer non short circuit. */
2043 &arm_default_vec_cost, /* Vectorizer costs. */
2044 false, /* Prefer Neon for 64-bits bitops. */
2045 false, false, /* Prefer 32-bit encodings. */
2046 false, /* Prefer Neon for stringops. */
2047 8, /* Maximum insns to inline memset. */
2048 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
2049 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
2052 /* Cortex-M7 tuning. */
2054 const struct tune_params arm_cortex_m7_tune =
2056 arm_9e_rtx_costs,
2057 &v7m_extra_costs,
2058 NULL, /* Sched adj cost. */
2059 0, /* Constant limit. */
2060 1, /* Max cond insns. */
2061 ARM_PREFETCH_NOT_BENEFICIAL,
2062 true, /* Prefer constant pool. */
2063 arm_cortex_m7_branch_cost,
2064 false, /* Prefer LDRD/STRD. */
2065 {true, true}, /* Prefer non short circuit. */
2066 &arm_default_vec_cost, /* Vectorizer costs. */
2067 false, /* Prefer Neon for 64-bits bitops. */
2068 false, false, /* Prefer 32-bit encodings. */
2069 false, /* Prefer Neon for stringops. */
2070 8, /* Maximum insns to inline memset. */
2071 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
2072 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
2075 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2076 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
2077 const struct tune_params arm_v6m_tune =
2079 arm_9e_rtx_costs,
2080 NULL,
2081 NULL, /* Sched adj cost. */
2082 1, /* Constant limit. */
2083 5, /* Max cond insns. */
2084 ARM_PREFETCH_NOT_BENEFICIAL,
2085 false, /* Prefer constant pool. */
2086 arm_default_branch_cost,
2087 false, /* Prefer LDRD/STRD. */
2088 {false, false}, /* Prefer non short circuit. */
2089 &arm_default_vec_cost, /* Vectorizer costs. */
2090 false, /* Prefer Neon for 64-bits bitops. */
2091 false, false, /* Prefer 32-bit encodings. */
2092 false, /* Prefer Neon for stringops. */
2093 8, /* Maximum insns to inline memset. */
2094 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
2095 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
2098 const struct tune_params arm_fa726te_tune =
2100 arm_9e_rtx_costs,
2101 NULL,
2102 fa726te_sched_adjust_cost,
2103 1, /* Constant limit. */
2104 5, /* Max cond insns. */
2105 ARM_PREFETCH_NOT_BENEFICIAL,
2106 true, /* Prefer constant pool. */
2107 arm_default_branch_cost,
2108 false, /* Prefer LDRD/STRD. */
2109 {true, true}, /* Prefer non short circuit. */
2110 &arm_default_vec_cost, /* Vectorizer costs. */
2111 false, /* Prefer Neon for 64-bits bitops. */
2112 false, false, /* Prefer 32-bit encodings. */
2113 false, /* Prefer Neon for stringops. */
2114 8, /* Maximum insns to inline memset. */
2115 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
2116 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
2120 /* Not all of these give usefully different compilation alternatives,
2121 but there is no simple way of generalizing them. */
2122 static const struct processors all_cores[] =
2124 /* ARM Cores */
2125 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2126 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2127 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2128 #include "arm-cores.def"
2129 #undef ARM_CORE
2130 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2133 static const struct processors all_architectures[] =
2135 /* ARM Architectures */
2136 /* We don't specify tuning costs here as it will be figured out
2137 from the core. */
2139 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2140 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2141 #include "arm-arches.def"
2142 #undef ARM_ARCH
2143 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2147 /* These are populated as commandline arguments are processed, or NULL
2148 if not specified. */
2149 static const struct processors *arm_selected_arch;
2150 static const struct processors *arm_selected_cpu;
2151 static const struct processors *arm_selected_tune;
2153 /* The name of the preprocessor macro to define for this architecture. */
2155 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
2157 /* Available values for -mfpu=. */
2159 static const struct arm_fpu_desc all_fpus[] =
2161 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2162 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2163 #include "arm-fpus.def"
2164 #undef ARM_FPU
2168 /* Supported TLS relocations. */
2170 enum tls_reloc {
2171 TLS_GD32,
2172 TLS_LDM32,
2173 TLS_LDO32,
2174 TLS_IE32,
2175 TLS_LE32,
2176 TLS_DESCSEQ /* GNU scheme */
2179 /* The maximum number of insns to be used when loading a constant. */
2180 inline static int
2181 arm_constant_limit (bool size_p)
2183 return size_p ? 1 : current_tune->constant_limit;
2186 /* Emit an insn that's a simple single-set. Both the operands must be known
2187 to be valid. */
2188 inline static rtx_insn *
2189 emit_set_insn (rtx x, rtx y)
2191 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
2194 /* Return the number of bits set in VALUE. */
2195 static unsigned
2196 bit_count (unsigned long value)
2198 unsigned long count = 0;
2200 while (value)
2202 count++;
2203 value &= value - 1; /* Clear the least-significant set bit. */
2206 return count;
2209 typedef struct
2211 machine_mode mode;
2212 const char *name;
2213 } arm_fixed_mode_set;
2215 /* A small helper for setting fixed-point library libfuncs. */
2217 static void
2218 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2219 const char *funcname, const char *modename,
2220 int num_suffix)
2222 char buffer[50];
2224 if (num_suffix == 0)
2225 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2226 else
2227 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2229 set_optab_libfunc (optable, mode, buffer);
2232 static void
2233 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2234 machine_mode from, const char *funcname,
2235 const char *toname, const char *fromname)
2237 char buffer[50];
2238 const char *maybe_suffix_2 = "";
2240 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2241 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2242 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2243 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2244 maybe_suffix_2 = "2";
2246 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2247 maybe_suffix_2);
2249 set_conv_libfunc (optable, to, from, buffer);
2252 /* Set up library functions unique to ARM. */
2254 static void
2255 arm_init_libfuncs (void)
2257 /* For Linux, we have access to kernel support for atomic operations. */
2258 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2259 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2261 /* There are no special library functions unless we are using the
2262 ARM BPABI. */
2263 if (!TARGET_BPABI)
2264 return;
2266 /* The functions below are described in Section 4 of the "Run-Time
2267 ABI for the ARM architecture", Version 1.0. */
2269 /* Double-precision floating-point arithmetic. Table 2. */
2270 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2271 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2272 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2273 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2274 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2276 /* Double-precision comparisons. Table 3. */
2277 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2278 set_optab_libfunc (ne_optab, DFmode, NULL);
2279 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2280 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2281 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2282 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2283 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2285 /* Single-precision floating-point arithmetic. Table 4. */
2286 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2287 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2288 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2289 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2290 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2292 /* Single-precision comparisons. Table 5. */
2293 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2294 set_optab_libfunc (ne_optab, SFmode, NULL);
2295 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2296 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2297 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2298 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2299 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2301 /* Floating-point to integer conversions. Table 6. */
2302 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2303 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2304 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2305 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2306 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2307 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2308 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2309 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2311 /* Conversions between floating types. Table 7. */
2312 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2313 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2315 /* Integer to floating-point conversions. Table 8. */
2316 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2317 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2318 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2319 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2320 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2321 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2322 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2323 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2325 /* Long long. Table 9. */
2326 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2327 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2328 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2329 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2330 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2331 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2332 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2333 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2335 /* Integer (32/32->32) division. \S 4.3.1. */
2336 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2337 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2339 /* The divmod functions are designed so that they can be used for
2340 plain division, even though they return both the quotient and the
2341 remainder. The quotient is returned in the usual location (i.e.,
2342 r0 for SImode, {r0, r1} for DImode), just as would be expected
2343 for an ordinary division routine. Because the AAPCS calling
2344 conventions specify that all of { r0, r1, r2, r3 } are
2345 callee-saved registers, there is no need to tell the compiler
2346 explicitly that those registers are clobbered by these
2347 routines. */
2348 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2349 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2351 /* For SImode division the ABI provides div-without-mod routines,
2352 which are faster. */
2353 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2354 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2356 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2357 divmod libcalls instead. */
2358 set_optab_libfunc (smod_optab, DImode, NULL);
2359 set_optab_libfunc (umod_optab, DImode, NULL);
2360 set_optab_libfunc (smod_optab, SImode, NULL);
2361 set_optab_libfunc (umod_optab, SImode, NULL);
2363 /* Half-precision float operations. The compiler handles all operations
2364 with NULL libfuncs by converting the SFmode. */
2365 switch (arm_fp16_format)
2367 case ARM_FP16_FORMAT_IEEE:
2368 case ARM_FP16_FORMAT_ALTERNATIVE:
2370 /* Conversions. */
2371 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2372 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2373 ? "__gnu_f2h_ieee"
2374 : "__gnu_f2h_alternative"));
2375 set_conv_libfunc (sext_optab, SFmode, HFmode,
2376 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2377 ? "__gnu_h2f_ieee"
2378 : "__gnu_h2f_alternative"));
2380 /* Arithmetic. */
2381 set_optab_libfunc (add_optab, HFmode, NULL);
2382 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2383 set_optab_libfunc (smul_optab, HFmode, NULL);
2384 set_optab_libfunc (neg_optab, HFmode, NULL);
2385 set_optab_libfunc (sub_optab, HFmode, NULL);
2387 /* Comparisons. */
2388 set_optab_libfunc (eq_optab, HFmode, NULL);
2389 set_optab_libfunc (ne_optab, HFmode, NULL);
2390 set_optab_libfunc (lt_optab, HFmode, NULL);
2391 set_optab_libfunc (le_optab, HFmode, NULL);
2392 set_optab_libfunc (ge_optab, HFmode, NULL);
2393 set_optab_libfunc (gt_optab, HFmode, NULL);
2394 set_optab_libfunc (unord_optab, HFmode, NULL);
2395 break;
2397 default:
2398 break;
2401 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2403 const arm_fixed_mode_set fixed_arith_modes[] =
2405 { QQmode, "qq" },
2406 { UQQmode, "uqq" },
2407 { HQmode, "hq" },
2408 { UHQmode, "uhq" },
2409 { SQmode, "sq" },
2410 { USQmode, "usq" },
2411 { DQmode, "dq" },
2412 { UDQmode, "udq" },
2413 { TQmode, "tq" },
2414 { UTQmode, "utq" },
2415 { HAmode, "ha" },
2416 { UHAmode, "uha" },
2417 { SAmode, "sa" },
2418 { USAmode, "usa" },
2419 { DAmode, "da" },
2420 { UDAmode, "uda" },
2421 { TAmode, "ta" },
2422 { UTAmode, "uta" }
2424 const arm_fixed_mode_set fixed_conv_modes[] =
2426 { QQmode, "qq" },
2427 { UQQmode, "uqq" },
2428 { HQmode, "hq" },
2429 { UHQmode, "uhq" },
2430 { SQmode, "sq" },
2431 { USQmode, "usq" },
2432 { DQmode, "dq" },
2433 { UDQmode, "udq" },
2434 { TQmode, "tq" },
2435 { UTQmode, "utq" },
2436 { HAmode, "ha" },
2437 { UHAmode, "uha" },
2438 { SAmode, "sa" },
2439 { USAmode, "usa" },
2440 { DAmode, "da" },
2441 { UDAmode, "uda" },
2442 { TAmode, "ta" },
2443 { UTAmode, "uta" },
2444 { QImode, "qi" },
2445 { HImode, "hi" },
2446 { SImode, "si" },
2447 { DImode, "di" },
2448 { TImode, "ti" },
2449 { SFmode, "sf" },
2450 { DFmode, "df" }
2452 unsigned int i, j;
2454 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2456 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2457 "add", fixed_arith_modes[i].name, 3);
2458 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2459 "ssadd", fixed_arith_modes[i].name, 3);
2460 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2461 "usadd", fixed_arith_modes[i].name, 3);
2462 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2463 "sub", fixed_arith_modes[i].name, 3);
2464 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2465 "sssub", fixed_arith_modes[i].name, 3);
2466 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2467 "ussub", fixed_arith_modes[i].name, 3);
2468 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2469 "mul", fixed_arith_modes[i].name, 3);
2470 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2471 "ssmul", fixed_arith_modes[i].name, 3);
2472 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2473 "usmul", fixed_arith_modes[i].name, 3);
2474 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2475 "div", fixed_arith_modes[i].name, 3);
2476 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2477 "udiv", fixed_arith_modes[i].name, 3);
2478 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2479 "ssdiv", fixed_arith_modes[i].name, 3);
2480 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2481 "usdiv", fixed_arith_modes[i].name, 3);
2482 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2483 "neg", fixed_arith_modes[i].name, 2);
2484 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2485 "ssneg", fixed_arith_modes[i].name, 2);
2486 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2487 "usneg", fixed_arith_modes[i].name, 2);
2488 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2489 "ashl", fixed_arith_modes[i].name, 3);
2490 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2491 "ashr", fixed_arith_modes[i].name, 3);
2492 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2493 "lshr", fixed_arith_modes[i].name, 3);
2494 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2495 "ssashl", fixed_arith_modes[i].name, 3);
2496 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2497 "usashl", fixed_arith_modes[i].name, 3);
2498 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2499 "cmp", fixed_arith_modes[i].name, 2);
2502 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2503 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2505 if (i == j
2506 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2507 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2508 continue;
2510 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2511 fixed_conv_modes[j].mode, "fract",
2512 fixed_conv_modes[i].name,
2513 fixed_conv_modes[j].name);
2514 arm_set_fixed_conv_libfunc (satfract_optab,
2515 fixed_conv_modes[i].mode,
2516 fixed_conv_modes[j].mode, "satfract",
2517 fixed_conv_modes[i].name,
2518 fixed_conv_modes[j].name);
2519 arm_set_fixed_conv_libfunc (fractuns_optab,
2520 fixed_conv_modes[i].mode,
2521 fixed_conv_modes[j].mode, "fractuns",
2522 fixed_conv_modes[i].name,
2523 fixed_conv_modes[j].name);
2524 arm_set_fixed_conv_libfunc (satfractuns_optab,
2525 fixed_conv_modes[i].mode,
2526 fixed_conv_modes[j].mode, "satfractuns",
2527 fixed_conv_modes[i].name,
2528 fixed_conv_modes[j].name);
2532 if (TARGET_AAPCS_BASED)
2533 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2536 /* On AAPCS systems, this is the "struct __va_list". */
2537 static GTY(()) tree va_list_type;
2539 /* Return the type to use as __builtin_va_list. */
2540 static tree
2541 arm_build_builtin_va_list (void)
2543 tree va_list_name;
2544 tree ap_field;
2546 if (!TARGET_AAPCS_BASED)
2547 return std_build_builtin_va_list ();
2549 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2550 defined as:
2552 struct __va_list
2554 void *__ap;
2557 The C Library ABI further reinforces this definition in \S
2558 4.1.
2560 We must follow this definition exactly. The structure tag
2561 name is visible in C++ mangled names, and thus forms a part
2562 of the ABI. The field name may be used by people who
2563 #include <stdarg.h>. */
2564 /* Create the type. */
2565 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2566 /* Give it the required name. */
2567 va_list_name = build_decl (BUILTINS_LOCATION,
2568 TYPE_DECL,
2569 get_identifier ("__va_list"),
2570 va_list_type);
2571 DECL_ARTIFICIAL (va_list_name) = 1;
2572 TYPE_NAME (va_list_type) = va_list_name;
2573 TYPE_STUB_DECL (va_list_type) = va_list_name;
2574 /* Create the __ap field. */
2575 ap_field = build_decl (BUILTINS_LOCATION,
2576 FIELD_DECL,
2577 get_identifier ("__ap"),
2578 ptr_type_node);
2579 DECL_ARTIFICIAL (ap_field) = 1;
2580 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2581 TYPE_FIELDS (va_list_type) = ap_field;
2582 /* Compute its layout. */
2583 layout_type (va_list_type);
2585 return va_list_type;
2588 /* Return an expression of type "void *" pointing to the next
2589 available argument in a variable-argument list. VALIST is the
2590 user-level va_list object, of type __builtin_va_list. */
2591 static tree
2592 arm_extract_valist_ptr (tree valist)
2594 if (TREE_TYPE (valist) == error_mark_node)
2595 return error_mark_node;
2597 /* On an AAPCS target, the pointer is stored within "struct
2598 va_list". */
2599 if (TARGET_AAPCS_BASED)
2601 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2602 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2603 valist, ap_field, NULL_TREE);
2606 return valist;
2609 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2610 static void
2611 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2613 valist = arm_extract_valist_ptr (valist);
2614 std_expand_builtin_va_start (valist, nextarg);
2617 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2618 static tree
2619 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2620 gimple_seq *post_p)
2622 valist = arm_extract_valist_ptr (valist);
2623 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2626 /* Fix up any incompatible options that the user has specified. */
2627 static void
2628 arm_option_override (void)
2630 arm_selected_arch = NULL;
2631 arm_selected_cpu = NULL;
2632 arm_selected_tune = NULL;
2634 if (global_options_set.x_arm_arch_option)
2635 arm_selected_arch = &all_architectures[arm_arch_option];
2637 if (global_options_set.x_arm_cpu_option)
2639 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2640 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2643 if (global_options_set.x_arm_tune_option)
2644 arm_selected_tune = &all_cores[(int) arm_tune_option];
2646 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2647 SUBTARGET_OVERRIDE_OPTIONS;
2648 #endif
2650 if (arm_selected_arch)
2652 if (arm_selected_cpu)
2654 /* Check for conflict between mcpu and march. */
2655 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2657 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2658 arm_selected_cpu->name, arm_selected_arch->name);
2659 /* -march wins for code generation.
2660 -mcpu wins for default tuning. */
2661 if (!arm_selected_tune)
2662 arm_selected_tune = arm_selected_cpu;
2664 arm_selected_cpu = arm_selected_arch;
2666 else
2667 /* -mcpu wins. */
2668 arm_selected_arch = NULL;
2670 else
2671 /* Pick a CPU based on the architecture. */
2672 arm_selected_cpu = arm_selected_arch;
2675 /* If the user did not specify a processor, choose one for them. */
2676 if (!arm_selected_cpu)
2678 const struct processors * sel;
2679 unsigned int sought;
2681 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2682 if (!arm_selected_cpu->name)
2684 #ifdef SUBTARGET_CPU_DEFAULT
2685 /* Use the subtarget default CPU if none was specified by
2686 configure. */
2687 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2688 #endif
2689 /* Default to ARM6. */
2690 if (!arm_selected_cpu->name)
2691 arm_selected_cpu = &all_cores[arm6];
2694 sel = arm_selected_cpu;
2695 insn_flags = sel->flags;
2697 /* Now check to see if the user has specified some command line
2698 switch that require certain abilities from the cpu. */
2699 sought = 0;
2701 if (TARGET_INTERWORK || TARGET_THUMB)
2703 sought |= (FL_THUMB | FL_MODE32);
2705 /* There are no ARM processors that support both APCS-26 and
2706 interworking. Therefore we force FL_MODE26 to be removed
2707 from insn_flags here (if it was set), so that the search
2708 below will always be able to find a compatible processor. */
2709 insn_flags &= ~FL_MODE26;
2712 if (sought != 0 && ((sought & insn_flags) != sought))
2714 /* Try to locate a CPU type that supports all of the abilities
2715 of the default CPU, plus the extra abilities requested by
2716 the user. */
2717 for (sel = all_cores; sel->name != NULL; sel++)
2718 if ((sel->flags & sought) == (sought | insn_flags))
2719 break;
2721 if (sel->name == NULL)
2723 unsigned current_bit_count = 0;
2724 const struct processors * best_fit = NULL;
2726 /* Ideally we would like to issue an error message here
2727 saying that it was not possible to find a CPU compatible
2728 with the default CPU, but which also supports the command
2729 line options specified by the programmer, and so they
2730 ought to use the -mcpu=<name> command line option to
2731 override the default CPU type.
2733 If we cannot find a cpu that has both the
2734 characteristics of the default cpu and the given
2735 command line options we scan the array again looking
2736 for a best match. */
2737 for (sel = all_cores; sel->name != NULL; sel++)
2738 if ((sel->flags & sought) == sought)
2740 unsigned count;
2742 count = bit_count (sel->flags & insn_flags);
2744 if (count >= current_bit_count)
2746 best_fit = sel;
2747 current_bit_count = count;
2751 gcc_assert (best_fit);
2752 sel = best_fit;
2755 arm_selected_cpu = sel;
2759 gcc_assert (arm_selected_cpu);
2760 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2761 if (!arm_selected_tune)
2762 arm_selected_tune = &all_cores[arm_selected_cpu->core];
2764 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2765 insn_flags = arm_selected_cpu->flags;
2766 arm_base_arch = arm_selected_cpu->base_arch;
2768 arm_tune = arm_selected_tune->core;
2769 tune_flags = arm_selected_tune->flags;
2770 current_tune = arm_selected_tune->tune;
2772 /* Make sure that the processor choice does not conflict with any of the
2773 other command line choices. */
2774 if (TARGET_ARM && !(insn_flags & FL_NOTM))
2775 error ("target CPU does not support ARM mode");
2777 /* BPABI targets use linker tricks to allow interworking on cores
2778 without thumb support. */
2779 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2781 warning (0, "target CPU does not support interworking" );
2782 target_flags &= ~MASK_INTERWORK;
2785 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2787 warning (0, "target CPU does not support THUMB instructions");
2788 target_flags &= ~MASK_THUMB;
2791 if (TARGET_APCS_FRAME && TARGET_THUMB)
2793 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2794 target_flags &= ~MASK_APCS_FRAME;
2797 /* Callee super interworking implies thumb interworking. Adding
2798 this to the flags here simplifies the logic elsewhere. */
2799 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2800 target_flags |= MASK_INTERWORK;
2802 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2803 from here where no function is being compiled currently. */
2804 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2805 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2807 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2808 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2810 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2812 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2813 target_flags |= MASK_APCS_FRAME;
2816 if (TARGET_POKE_FUNCTION_NAME)
2817 target_flags |= MASK_APCS_FRAME;
2819 if (TARGET_APCS_REENT && flag_pic)
2820 error ("-fpic and -mapcs-reent are incompatible");
2822 if (TARGET_APCS_REENT)
2823 warning (0, "APCS reentrant code not supported. Ignored");
2825 /* If this target is normally configured to use APCS frames, warn if they
2826 are turned off and debugging is turned on. */
2827 if (TARGET_ARM
2828 && write_symbols != NO_DEBUG
2829 && !TARGET_APCS_FRAME
2830 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2831 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2833 if (TARGET_APCS_FLOAT)
2834 warning (0, "passing floating point arguments in fp regs not yet supported");
2836 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2837 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2838 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2839 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2840 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2841 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2842 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2843 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2844 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2845 arm_arch6m = arm_arch6 && !arm_arch_notm;
2846 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2847 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2848 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2849 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2850 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2852 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2853 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2854 thumb_code = TARGET_ARM == 0;
2855 thumb1_code = TARGET_THUMB1 != 0;
2856 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2857 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2858 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2859 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2860 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2861 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2862 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2863 arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2864 arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0;
2865 if (arm_restrict_it == 2)
2866 arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2868 if (!TARGET_THUMB2)
2869 arm_restrict_it = 0;
2871 /* If we are not using the default (ARM mode) section anchor offset
2872 ranges, then set the correct ranges now. */
2873 if (TARGET_THUMB1)
2875 /* Thumb-1 LDR instructions cannot have negative offsets.
2876 Permissible positive offset ranges are 5-bit (for byte loads),
2877 6-bit (for halfword loads), or 7-bit (for word loads).
2878 Empirical results suggest a 7-bit anchor range gives the best
2879 overall code size. */
2880 targetm.min_anchor_offset = 0;
2881 targetm.max_anchor_offset = 127;
2883 else if (TARGET_THUMB2)
2885 /* The minimum is set such that the total size of the block
2886 for a particular anchor is 248 + 1 + 4095 bytes, which is
2887 divisible by eight, ensuring natural spacing of anchors. */
2888 targetm.min_anchor_offset = -248;
2889 targetm.max_anchor_offset = 4095;
2892 /* V5 code we generate is completely interworking capable, so we turn off
2893 TARGET_INTERWORK here to avoid many tests later on. */
2895 /* XXX However, we must pass the right pre-processor defines to CPP
2896 or GLD can get confused. This is a hack. */
2897 if (TARGET_INTERWORK)
2898 arm_cpp_interwork = 1;
2900 if (arm_arch5)
2901 target_flags &= ~MASK_INTERWORK;
2903 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2904 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2906 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2907 error ("iwmmxt abi requires an iwmmxt capable cpu");
2909 if (!global_options_set.x_arm_fpu_index)
2911 const char *target_fpu_name;
2912 bool ok;
2914 #ifdef FPUTYPE_DEFAULT
2915 target_fpu_name = FPUTYPE_DEFAULT;
2916 #else
2917 target_fpu_name = "vfp";
2918 #endif
2920 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2921 CL_TARGET);
2922 gcc_assert (ok);
2925 arm_fpu_desc = &all_fpus[arm_fpu_index];
2927 switch (arm_fpu_desc->model)
2929 case ARM_FP_MODEL_VFP:
2930 arm_fpu_attr = FPU_VFP;
2931 break;
2933 default:
2934 gcc_unreachable();
2937 if (TARGET_AAPCS_BASED)
2939 if (TARGET_CALLER_INTERWORKING)
2940 error ("AAPCS does not support -mcaller-super-interworking");
2941 else
2942 if (TARGET_CALLEE_INTERWORKING)
2943 error ("AAPCS does not support -mcallee-super-interworking");
2946 /* iWMMXt and NEON are incompatible. */
2947 if (TARGET_IWMMXT && TARGET_NEON)
2948 error ("iWMMXt and NEON are incompatible");
2950 /* iWMMXt unsupported under Thumb mode. */
2951 if (TARGET_THUMB && TARGET_IWMMXT)
2952 error ("iWMMXt unsupported under Thumb mode");
2954 /* __fp16 support currently assumes the core has ldrh. */
2955 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2956 sorry ("__fp16 and no ldrh");
2958 /* If soft-float is specified then don't use FPU. */
2959 if (TARGET_SOFT_FLOAT)
2960 arm_fpu_attr = FPU_NONE;
2962 if (TARGET_AAPCS_BASED)
2964 if (arm_abi == ARM_ABI_IWMMXT)
2965 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2966 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2967 && TARGET_HARD_FLOAT
2968 && TARGET_VFP)
2969 arm_pcs_default = ARM_PCS_AAPCS_VFP;
2970 else
2971 arm_pcs_default = ARM_PCS_AAPCS;
2973 else
2975 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2976 sorry ("-mfloat-abi=hard and VFP");
2978 if (arm_abi == ARM_ABI_APCS)
2979 arm_pcs_default = ARM_PCS_APCS;
2980 else
2981 arm_pcs_default = ARM_PCS_ATPCS;
2984 /* For arm2/3 there is no need to do any scheduling if we are doing
2985 software floating-point. */
2986 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2987 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2989 /* Use the cp15 method if it is available. */
2990 if (target_thread_pointer == TP_AUTO)
2992 if (arm_arch6k && !TARGET_THUMB1)
2993 target_thread_pointer = TP_CP15;
2994 else
2995 target_thread_pointer = TP_SOFT;
2998 if (TARGET_HARD_TP && TARGET_THUMB1)
2999 error ("can not use -mtp=cp15 with 16-bit Thumb");
3001 /* Override the default structure alignment for AAPCS ABI. */
3002 if (!global_options_set.x_arm_structure_size_boundary)
3004 if (TARGET_AAPCS_BASED)
3005 arm_structure_size_boundary = 8;
3007 else
3009 if (arm_structure_size_boundary != 8
3010 && arm_structure_size_boundary != 32
3011 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3013 if (ARM_DOUBLEWORD_ALIGN)
3014 warning (0,
3015 "structure size boundary can only be set to 8, 32 or 64");
3016 else
3017 warning (0, "structure size boundary can only be set to 8 or 32");
3018 arm_structure_size_boundary
3019 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3023 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
3025 error ("RTP PIC is incompatible with Thumb");
3026 flag_pic = 0;
3029 /* If stack checking is disabled, we can use r10 as the PIC register,
3030 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3031 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3033 if (TARGET_VXWORKS_RTP)
3034 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3035 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3038 if (flag_pic && TARGET_VXWORKS_RTP)
3039 arm_pic_register = 9;
3041 if (arm_pic_register_string != NULL)
3043 int pic_register = decode_reg_name (arm_pic_register_string);
3045 if (!flag_pic)
3046 warning (0, "-mpic-register= is useless without -fpic");
3048 /* Prevent the user from choosing an obviously stupid PIC register. */
3049 else if (pic_register < 0 || call_used_regs[pic_register]
3050 || pic_register == HARD_FRAME_POINTER_REGNUM
3051 || pic_register == STACK_POINTER_REGNUM
3052 || pic_register >= PC_REGNUM
3053 || (TARGET_VXWORKS_RTP
3054 && (unsigned int) pic_register != arm_pic_register))
3055 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3056 else
3057 arm_pic_register = pic_register;
3060 if (TARGET_VXWORKS_RTP
3061 && !global_options_set.x_arm_pic_data_is_text_relative)
3062 arm_pic_data_is_text_relative = 0;
3064 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3065 if (fix_cm3_ldrd == 2)
3067 if (arm_selected_cpu->core == cortexm3)
3068 fix_cm3_ldrd = 1;
3069 else
3070 fix_cm3_ldrd = 0;
3073 /* Enable -munaligned-access by default for
3074 - all ARMv6 architecture-based processors
3075 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3076 - ARMv8 architecture-base processors.
3078 Disable -munaligned-access by default for
3079 - all pre-ARMv6 architecture-based processors
3080 - ARMv6-M architecture-based processors. */
3082 if (unaligned_access == 2)
3084 if (arm_arch6 && (arm_arch_notm || arm_arch7))
3085 unaligned_access = 1;
3086 else
3087 unaligned_access = 0;
3089 else if (unaligned_access == 1
3090 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3092 warning (0, "target CPU does not support unaligned accesses");
3093 unaligned_access = 0;
3096 if (TARGET_THUMB1 && flag_schedule_insns)
3098 /* Don't warn since it's on by default in -O2. */
3099 flag_schedule_insns = 0;
3102 if (optimize_size)
3104 /* If optimizing for size, bump the number of instructions that we
3105 are prepared to conditionally execute (even on a StrongARM). */
3106 max_insns_skipped = 6;
3108 /* For THUMB2, we limit the conditional sequence to one IT block. */
3109 if (TARGET_THUMB2)
3110 max_insns_skipped = MAX_INSN_PER_IT_BLOCK;
3112 else
3113 max_insns_skipped = current_tune->max_insns_skipped;
3115 /* Hot/Cold partitioning is not currently supported, since we can't
3116 handle literal pool placement in that case. */
3117 if (flag_reorder_blocks_and_partition)
3119 inform (input_location,
3120 "-freorder-blocks-and-partition not supported on this architecture");
3121 flag_reorder_blocks_and_partition = 0;
3122 flag_reorder_blocks = 1;
3125 if (flag_pic)
3126 /* Hoisting PIC address calculations more aggressively provides a small,
3127 but measurable, size reduction for PIC code. Therefore, we decrease
3128 the bar for unrestricted expression hoisting to the cost of PIC address
3129 calculation, which is 2 instructions. */
3130 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3131 global_options.x_param_values,
3132 global_options_set.x_param_values);
3134 /* ARM EABI defaults to strict volatile bitfields. */
3135 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3136 && abi_version_at_least(2))
3137 flag_strict_volatile_bitfields = 1;
3139 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
3140 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
3141 if (flag_prefetch_loop_arrays < 0
3142 && HAVE_prefetch
3143 && optimize >= 3
3144 && current_tune->num_prefetch_slots > 0)
3145 flag_prefetch_loop_arrays = 1;
3147 /* Set up parameters to be used in prefetching algorithm. Do not override the
3148 defaults unless we are tuning for a core we have researched values for. */
3149 if (current_tune->num_prefetch_slots > 0)
3150 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3151 current_tune->num_prefetch_slots,
3152 global_options.x_param_values,
3153 global_options_set.x_param_values);
3154 if (current_tune->l1_cache_line_size >= 0)
3155 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3156 current_tune->l1_cache_line_size,
3157 global_options.x_param_values,
3158 global_options_set.x_param_values);
3159 if (current_tune->l1_cache_size >= 0)
3160 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3161 current_tune->l1_cache_size,
3162 global_options.x_param_values,
3163 global_options_set.x_param_values);
3165 /* Use Neon to perform 64-bits operations rather than core
3166 registers. */
3167 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3168 if (use_neon_for_64bits == 1)
3169 prefer_neon_for_64bits = true;
3171 /* Use the alternative scheduling-pressure algorithm by default. */
3172 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3173 global_options.x_param_values,
3174 global_options_set.x_param_values);
3176 /* Look through ready list and all of queue for instructions
3177 relevant for L2 auto-prefetcher. */
3178 int param_sched_autopref_queue_depth;
3179 if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_OFF)
3180 param_sched_autopref_queue_depth = -1;
3181 else if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_RANK)
3182 param_sched_autopref_queue_depth = 0;
3183 else if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_FULL)
3184 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3185 else
3186 gcc_unreachable ();
3187 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3188 param_sched_autopref_queue_depth,
3189 global_options.x_param_values,
3190 global_options_set.x_param_values);
3192 /* Disable shrink-wrap when optimizing function for size, since it tends to
3193 generate additional returns. */
3194 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
3195 flag_shrink_wrap = false;
3196 /* TBD: Dwarf info for apcs frame is not handled yet. */
3197 if (TARGET_APCS_FRAME)
3198 flag_shrink_wrap = false;
3200 /* We only support -mslow-flash-data on armv7-m targets. */
3201 if (target_slow_flash_data
3202 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
3203 || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
3204 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
3206 /* Currently, for slow flash data, we just disable literal pools. */
3207 if (target_slow_flash_data)
3208 arm_disable_literal_pool = true;
3210 /* Thumb2 inline assembly code should always use unified syntax.
3211 This will apply to ARM and Thumb1 eventually. */
3212 if (TARGET_THUMB2)
3213 inline_asm_unified = 1;
3215 /* Disable scheduling fusion by default if it's not armv7 processor
3216 or doesn't prefer ldrd/strd. */
3217 if (flag_schedule_fusion == 2
3218 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3219 flag_schedule_fusion = 0;
3221 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3222 - epilogue_insns - does not accurately model the corresponding insns
3223 emitted in the asm file. In particular, see the comment in thumb_exit
3224 'Find out how many of the (return) argument registers we can corrupt'.
3225 As a consequence, the epilogue may clobber registers without fipa-ra
3226 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3227 TODO: Accurately model clobbers for epilogue_insns and reenable
3228 fipa-ra. */
3229 if (TARGET_THUMB1)
3230 flag_ipa_ra = 0;
3232 /* Register global variables with the garbage collector. */
3233 arm_add_gc_roots ();
3236 static void
3237 arm_add_gc_roots (void)
3239 gcc_obstack_init(&minipool_obstack);
3240 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3243 /* A table of known ARM exception types.
3244 For use with the interrupt function attribute. */
3246 typedef struct
3248 const char *const arg;
3249 const unsigned long return_value;
3251 isr_attribute_arg;
3253 static const isr_attribute_arg isr_attribute_args [] =
3255 { "IRQ", ARM_FT_ISR },
3256 { "irq", ARM_FT_ISR },
3257 { "FIQ", ARM_FT_FIQ },
3258 { "fiq", ARM_FT_FIQ },
3259 { "ABORT", ARM_FT_ISR },
3260 { "abort", ARM_FT_ISR },
3261 { "ABORT", ARM_FT_ISR },
3262 { "abort", ARM_FT_ISR },
3263 { "UNDEF", ARM_FT_EXCEPTION },
3264 { "undef", ARM_FT_EXCEPTION },
3265 { "SWI", ARM_FT_EXCEPTION },
3266 { "swi", ARM_FT_EXCEPTION },
3267 { NULL, ARM_FT_NORMAL }
3270 /* Returns the (interrupt) function type of the current
3271 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3273 static unsigned long
3274 arm_isr_value (tree argument)
3276 const isr_attribute_arg * ptr;
3277 const char * arg;
3279 if (!arm_arch_notm)
3280 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3282 /* No argument - default to IRQ. */
3283 if (argument == NULL_TREE)
3284 return ARM_FT_ISR;
3286 /* Get the value of the argument. */
3287 if (TREE_VALUE (argument) == NULL_TREE
3288 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3289 return ARM_FT_UNKNOWN;
3291 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3293 /* Check it against the list of known arguments. */
3294 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3295 if (streq (arg, ptr->arg))
3296 return ptr->return_value;
3298 /* An unrecognized interrupt type. */
3299 return ARM_FT_UNKNOWN;
3302 /* Computes the type of the current function. */
3304 static unsigned long
3305 arm_compute_func_type (void)
3307 unsigned long type = ARM_FT_UNKNOWN;
3308 tree a;
3309 tree attr;
3311 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3313 /* Decide if the current function is volatile. Such functions
3314 never return, and many memory cycles can be saved by not storing
3315 register values that will never be needed again. This optimization
3316 was added to speed up context switching in a kernel application. */
3317 if (optimize > 0
3318 && (TREE_NOTHROW (current_function_decl)
3319 || !(flag_unwind_tables
3320 || (flag_exceptions
3321 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3322 && TREE_THIS_VOLATILE (current_function_decl))
3323 type |= ARM_FT_VOLATILE;
3325 if (cfun->static_chain_decl != NULL)
3326 type |= ARM_FT_NESTED;
3328 attr = DECL_ATTRIBUTES (current_function_decl);
3330 a = lookup_attribute ("naked", attr);
3331 if (a != NULL_TREE)
3332 type |= ARM_FT_NAKED;
3334 a = lookup_attribute ("isr", attr);
3335 if (a == NULL_TREE)
3336 a = lookup_attribute ("interrupt", attr);
3338 if (a == NULL_TREE)
3339 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3340 else
3341 type |= arm_isr_value (TREE_VALUE (a));
3343 return type;
3346 /* Returns the type of the current function. */
3348 unsigned long
3349 arm_current_func_type (void)
3351 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3352 cfun->machine->func_type = arm_compute_func_type ();
3354 return cfun->machine->func_type;
3357 bool
3358 arm_allocate_stack_slots_for_args (void)
3360 /* Naked functions should not allocate stack slots for arguments. */
3361 return !IS_NAKED (arm_current_func_type ());
3364 static bool
3365 arm_warn_func_return (tree decl)
3367 /* Naked functions are implemented entirely in assembly, including the
3368 return sequence, so suppress warnings about this. */
3369 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3373 /* Output assembler code for a block containing the constant parts
3374 of a trampoline, leaving space for the variable parts.
3376 On the ARM, (if r8 is the static chain regnum, and remembering that
3377 referencing pc adds an offset of 8) the trampoline looks like:
3378 ldr r8, [pc, #0]
3379 ldr pc, [pc]
3380 .word static chain value
3381 .word function's address
3382 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3384 static void
3385 arm_asm_trampoline_template (FILE *f)
3387 if (TARGET_ARM)
3389 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3390 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3392 else if (TARGET_THUMB2)
3394 /* The Thumb-2 trampoline is similar to the arm implementation.
3395 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3396 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3397 STATIC_CHAIN_REGNUM, PC_REGNUM);
3398 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3400 else
3402 ASM_OUTPUT_ALIGN (f, 2);
3403 fprintf (f, "\t.code\t16\n");
3404 fprintf (f, ".Ltrampoline_start:\n");
3405 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3406 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3407 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3408 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3409 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3410 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3412 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3413 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3416 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3418 static void
3419 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3421 rtx fnaddr, mem, a_tramp;
3423 emit_block_move (m_tramp, assemble_trampoline_template (),
3424 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3426 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3427 emit_move_insn (mem, chain_value);
3429 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3430 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3431 emit_move_insn (mem, fnaddr);
3433 a_tramp = XEXP (m_tramp, 0);
3434 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3435 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3436 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3439 /* Thumb trampolines should be entered in thumb mode, so set
3440 the bottom bit of the address. */
3442 static rtx
3443 arm_trampoline_adjust_address (rtx addr)
3445 if (TARGET_THUMB)
3446 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3447 NULL, 0, OPTAB_LIB_WIDEN);
3448 return addr;
3451 /* Return 1 if it is possible to return using a single instruction.
3452 If SIBLING is non-null, this is a test for a return before a sibling
3453 call. SIBLING is the call insn, so we can examine its register usage. */
3456 use_return_insn (int iscond, rtx sibling)
3458 int regno;
3459 unsigned int func_type;
3460 unsigned long saved_int_regs;
3461 unsigned HOST_WIDE_INT stack_adjust;
3462 arm_stack_offsets *offsets;
3464 /* Never use a return instruction before reload has run. */
3465 if (!reload_completed)
3466 return 0;
3468 func_type = arm_current_func_type ();
3470 /* Naked, volatile and stack alignment functions need special
3471 consideration. */
3472 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3473 return 0;
3475 /* So do interrupt functions that use the frame pointer and Thumb
3476 interrupt functions. */
3477 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3478 return 0;
3480 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3481 && !optimize_function_for_size_p (cfun))
3482 return 0;
3484 offsets = arm_get_frame_offsets ();
3485 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3487 /* As do variadic functions. */
3488 if (crtl->args.pretend_args_size
3489 || cfun->machine->uses_anonymous_args
3490 /* Or if the function calls __builtin_eh_return () */
3491 || crtl->calls_eh_return
3492 /* Or if the function calls alloca */
3493 || cfun->calls_alloca
3494 /* Or if there is a stack adjustment. However, if the stack pointer
3495 is saved on the stack, we can use a pre-incrementing stack load. */
3496 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3497 && stack_adjust == 4)))
3498 return 0;
3500 saved_int_regs = offsets->saved_regs_mask;
3502 /* Unfortunately, the insn
3504 ldmib sp, {..., sp, ...}
3506 triggers a bug on most SA-110 based devices, such that the stack
3507 pointer won't be correctly restored if the instruction takes a
3508 page fault. We work around this problem by popping r3 along with
3509 the other registers, since that is never slower than executing
3510 another instruction.
3512 We test for !arm_arch5 here, because code for any architecture
3513 less than this could potentially be run on one of the buggy
3514 chips. */
3515 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3517 /* Validate that r3 is a call-clobbered register (always true in
3518 the default abi) ... */
3519 if (!call_used_regs[3])
3520 return 0;
3522 /* ... that it isn't being used for a return value ... */
3523 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3524 return 0;
3526 /* ... or for a tail-call argument ... */
3527 if (sibling)
3529 gcc_assert (CALL_P (sibling));
3531 if (find_regno_fusage (sibling, USE, 3))
3532 return 0;
3535 /* ... and that there are no call-saved registers in r0-r2
3536 (always true in the default ABI). */
3537 if (saved_int_regs & 0x7)
3538 return 0;
3541 /* Can't be done if interworking with Thumb, and any registers have been
3542 stacked. */
3543 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3544 return 0;
3546 /* On StrongARM, conditional returns are expensive if they aren't
3547 taken and multiple registers have been stacked. */
3548 if (iscond && arm_tune_strongarm)
3550 /* Conditional return when just the LR is stored is a simple
3551 conditional-load instruction, that's not expensive. */
3552 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3553 return 0;
3555 if (flag_pic
3556 && arm_pic_register != INVALID_REGNUM
3557 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3558 return 0;
3561 /* If there are saved registers but the LR isn't saved, then we need
3562 two instructions for the return. */
3563 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3564 return 0;
3566 /* Can't be done if any of the VFP regs are pushed,
3567 since this also requires an insn. */
3568 if (TARGET_HARD_FLOAT && TARGET_VFP)
3569 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3570 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3571 return 0;
3573 if (TARGET_REALLY_IWMMXT)
3574 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3575 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3576 return 0;
3578 return 1;
3581 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3582 shrink-wrapping if possible. This is the case if we need to emit a
3583 prologue, which we can test by looking at the offsets. */
3584 bool
3585 use_simple_return_p (void)
3587 arm_stack_offsets *offsets;
3589 offsets = arm_get_frame_offsets ();
3590 return offsets->outgoing_args != 0;
3593 /* Return TRUE if int I is a valid immediate ARM constant. */
3596 const_ok_for_arm (HOST_WIDE_INT i)
3598 int lowbit;
3600 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3601 be all zero, or all one. */
3602 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3603 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3604 != ((~(unsigned HOST_WIDE_INT) 0)
3605 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3606 return FALSE;
3608 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3610 /* Fast return for 0 and small values. We must do this for zero, since
3611 the code below can't handle that one case. */
3612 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3613 return TRUE;
3615 /* Get the number of trailing zeros. */
3616 lowbit = ffs((int) i) - 1;
3618 /* Only even shifts are allowed in ARM mode so round down to the
3619 nearest even number. */
3620 if (TARGET_ARM)
3621 lowbit &= ~1;
3623 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3624 return TRUE;
3626 if (TARGET_ARM)
3628 /* Allow rotated constants in ARM mode. */
3629 if (lowbit <= 4
3630 && ((i & ~0xc000003f) == 0
3631 || (i & ~0xf000000f) == 0
3632 || (i & ~0xfc000003) == 0))
3633 return TRUE;
3635 else
3637 HOST_WIDE_INT v;
3639 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3640 v = i & 0xff;
3641 v |= v << 16;
3642 if (i == v || i == (v | (v << 8)))
3643 return TRUE;
3645 /* Allow repeated pattern 0xXY00XY00. */
3646 v = i & 0xff00;
3647 v |= v << 16;
3648 if (i == v)
3649 return TRUE;
3652 return FALSE;
3655 /* Return true if I is a valid constant for the operation CODE. */
3657 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3659 if (const_ok_for_arm (i))
3660 return 1;
3662 switch (code)
3664 case SET:
3665 /* See if we can use movw. */
3666 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3667 return 1;
3668 else
3669 /* Otherwise, try mvn. */
3670 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3672 case PLUS:
3673 /* See if we can use addw or subw. */
3674 if (TARGET_THUMB2
3675 && ((i & 0xfffff000) == 0
3676 || ((-i) & 0xfffff000) == 0))
3677 return 1;
3678 /* else fall through. */
3680 case COMPARE:
3681 case EQ:
3682 case NE:
3683 case GT:
3684 case LE:
3685 case LT:
3686 case GE:
3687 case GEU:
3688 case LTU:
3689 case GTU:
3690 case LEU:
3691 case UNORDERED:
3692 case ORDERED:
3693 case UNEQ:
3694 case UNGE:
3695 case UNLT:
3696 case UNGT:
3697 case UNLE:
3698 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3700 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3701 case XOR:
3702 return 0;
3704 case IOR:
3705 if (TARGET_THUMB2)
3706 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3707 return 0;
3709 case AND:
3710 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3712 default:
3713 gcc_unreachable ();
3717 /* Return true if I is a valid di mode constant for the operation CODE. */
3719 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3721 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3722 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3723 rtx hi = GEN_INT (hi_val);
3724 rtx lo = GEN_INT (lo_val);
3726 if (TARGET_THUMB1)
3727 return 0;
3729 switch (code)
3731 case AND:
3732 case IOR:
3733 case XOR:
3734 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3735 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3736 case PLUS:
3737 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3739 default:
3740 return 0;
3744 /* Emit a sequence of insns to handle a large constant.
3745 CODE is the code of the operation required, it can be any of SET, PLUS,
3746 IOR, AND, XOR, MINUS;
3747 MODE is the mode in which the operation is being performed;
3748 VAL is the integer to operate on;
3749 SOURCE is the other operand (a register, or a null-pointer for SET);
3750 SUBTARGETS means it is safe to create scratch registers if that will
3751 either produce a simpler sequence, or we will want to cse the values.
3752 Return value is the number of insns emitted. */
3754 /* ??? Tweak this for thumb2. */
3756 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
3757 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3759 rtx cond;
3761 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3762 cond = COND_EXEC_TEST (PATTERN (insn));
3763 else
3764 cond = NULL_RTX;
3766 if (subtargets || code == SET
3767 || (REG_P (target) && REG_P (source)
3768 && REGNO (target) != REGNO (source)))
3770 /* After arm_reorg has been called, we can't fix up expensive
3771 constants by pushing them into memory so we must synthesize
3772 them in-line, regardless of the cost. This is only likely to
3773 be more costly on chips that have load delay slots and we are
3774 compiling without running the scheduler (so no splitting
3775 occurred before the final instruction emission).
3777 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3779 if (!cfun->machine->after_arm_reorg
3780 && !cond
3781 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3782 1, 0)
3783 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3784 + (code != SET))))
3786 if (code == SET)
3788 /* Currently SET is the only monadic value for CODE, all
3789 the rest are diadic. */
3790 if (TARGET_USE_MOVT)
3791 arm_emit_movpair (target, GEN_INT (val));
3792 else
3793 emit_set_insn (target, GEN_INT (val));
3795 return 1;
3797 else
3799 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3801 if (TARGET_USE_MOVT)
3802 arm_emit_movpair (temp, GEN_INT (val));
3803 else
3804 emit_set_insn (temp, GEN_INT (val));
3806 /* For MINUS, the value is subtracted from, since we never
3807 have subtraction of a constant. */
3808 if (code == MINUS)
3809 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3810 else
3811 emit_set_insn (target,
3812 gen_rtx_fmt_ee (code, mode, source, temp));
3813 return 2;
3818 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3822 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3823 ARM/THUMB2 immediates, and add up to VAL.
3824 Thr function return value gives the number of insns required. */
3825 static int
3826 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3827 struct four_ints *return_sequence)
3829 int best_consecutive_zeros = 0;
3830 int i;
3831 int best_start = 0;
3832 int insns1, insns2;
3833 struct four_ints tmp_sequence;
3835 /* If we aren't targeting ARM, the best place to start is always at
3836 the bottom, otherwise look more closely. */
3837 if (TARGET_ARM)
3839 for (i = 0; i < 32; i += 2)
3841 int consecutive_zeros = 0;
3843 if (!(val & (3 << i)))
3845 while ((i < 32) && !(val & (3 << i)))
3847 consecutive_zeros += 2;
3848 i += 2;
3850 if (consecutive_zeros > best_consecutive_zeros)
3852 best_consecutive_zeros = consecutive_zeros;
3853 best_start = i - consecutive_zeros;
3855 i -= 2;
3860 /* So long as it won't require any more insns to do so, it's
3861 desirable to emit a small constant (in bits 0...9) in the last
3862 insn. This way there is more chance that it can be combined with
3863 a later addressing insn to form a pre-indexed load or store
3864 operation. Consider:
3866 *((volatile int *)0xe0000100) = 1;
3867 *((volatile int *)0xe0000110) = 2;
3869 We want this to wind up as:
3871 mov rA, #0xe0000000
3872 mov rB, #1
3873 str rB, [rA, #0x100]
3874 mov rB, #2
3875 str rB, [rA, #0x110]
3877 rather than having to synthesize both large constants from scratch.
3879 Therefore, we calculate how many insns would be required to emit
3880 the constant starting from `best_start', and also starting from
3881 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3882 yield a shorter sequence, we may as well use zero. */
3883 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3884 if (best_start != 0
3885 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3887 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3888 if (insns2 <= insns1)
3890 *return_sequence = tmp_sequence;
3891 insns1 = insns2;
3895 return insns1;
3898 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3899 static int
3900 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3901 struct four_ints *return_sequence, int i)
3903 int remainder = val & 0xffffffff;
3904 int insns = 0;
3906 /* Try and find a way of doing the job in either two or three
3907 instructions.
3909 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3910 location. We start at position I. This may be the MSB, or
3911 optimial_immediate_sequence may have positioned it at the largest block
3912 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3913 wrapping around to the top of the word when we drop off the bottom.
3914 In the worst case this code should produce no more than four insns.
3916 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3917 constants, shifted to any arbitrary location. We should always start
3918 at the MSB. */
3921 int end;
3922 unsigned int b1, b2, b3, b4;
3923 unsigned HOST_WIDE_INT result;
3924 int loc;
3926 gcc_assert (insns < 4);
3928 if (i <= 0)
3929 i += 32;
3931 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3932 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3934 loc = i;
3935 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3936 /* We can use addw/subw for the last 12 bits. */
3937 result = remainder;
3938 else
3940 /* Use an 8-bit shifted/rotated immediate. */
3941 end = i - 8;
3942 if (end < 0)
3943 end += 32;
3944 result = remainder & ((0x0ff << end)
3945 | ((i < end) ? (0xff >> (32 - end))
3946 : 0));
3947 i -= 8;
3950 else
3952 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3953 arbitrary shifts. */
3954 i -= TARGET_ARM ? 2 : 1;
3955 continue;
3958 /* Next, see if we can do a better job with a thumb2 replicated
3959 constant.
3961 We do it this way around to catch the cases like 0x01F001E0 where
3962 two 8-bit immediates would work, but a replicated constant would
3963 make it worse.
3965 TODO: 16-bit constants that don't clear all the bits, but still win.
3966 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3967 if (TARGET_THUMB2)
3969 b1 = (remainder & 0xff000000) >> 24;
3970 b2 = (remainder & 0x00ff0000) >> 16;
3971 b3 = (remainder & 0x0000ff00) >> 8;
3972 b4 = remainder & 0xff;
3974 if (loc > 24)
3976 /* The 8-bit immediate already found clears b1 (and maybe b2),
3977 but must leave b3 and b4 alone. */
3979 /* First try to find a 32-bit replicated constant that clears
3980 almost everything. We can assume that we can't do it in one,
3981 or else we wouldn't be here. */
3982 unsigned int tmp = b1 & b2 & b3 & b4;
3983 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3984 + (tmp << 24);
3985 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3986 + (tmp == b3) + (tmp == b4);
3987 if (tmp
3988 && (matching_bytes >= 3
3989 || (matching_bytes == 2
3990 && const_ok_for_op (remainder & ~tmp2, code))))
3992 /* At least 3 of the bytes match, and the fourth has at
3993 least as many bits set, or two of the bytes match
3994 and it will only require one more insn to finish. */
3995 result = tmp2;
3996 i = tmp != b1 ? 32
3997 : tmp != b2 ? 24
3998 : tmp != b3 ? 16
3999 : 8;
4002 /* Second, try to find a 16-bit replicated constant that can
4003 leave three of the bytes clear. If b2 or b4 is already
4004 zero, then we can. If the 8-bit from above would not
4005 clear b2 anyway, then we still win. */
4006 else if (b1 == b3 && (!b2 || !b4
4007 || (remainder & 0x00ff0000 & ~result)))
4009 result = remainder & 0xff00ff00;
4010 i = 24;
4013 else if (loc > 16)
4015 /* The 8-bit immediate already found clears b2 (and maybe b3)
4016 and we don't get here unless b1 is alredy clear, but it will
4017 leave b4 unchanged. */
4019 /* If we can clear b2 and b4 at once, then we win, since the
4020 8-bits couldn't possibly reach that far. */
4021 if (b2 == b4)
4023 result = remainder & 0x00ff00ff;
4024 i = 16;
4029 return_sequence->i[insns++] = result;
4030 remainder &= ~result;
4032 if (code == SET || code == MINUS)
4033 code = PLUS;
4035 while (remainder);
4037 return insns;
4040 /* Emit an instruction with the indicated PATTERN. If COND is
4041 non-NULL, conditionalize the execution of the instruction on COND
4042 being true. */
4044 static void
4045 emit_constant_insn (rtx cond, rtx pattern)
4047 if (cond)
4048 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4049 emit_insn (pattern);
4052 /* As above, but extra parameter GENERATE which, if clear, suppresses
4053 RTL generation. */
4055 static int
4056 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4057 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
4058 int generate)
4060 int can_invert = 0;
4061 int can_negate = 0;
4062 int final_invert = 0;
4063 int i;
4064 int set_sign_bit_copies = 0;
4065 int clear_sign_bit_copies = 0;
4066 int clear_zero_bit_copies = 0;
4067 int set_zero_bit_copies = 0;
4068 int insns = 0, neg_insns, inv_insns;
4069 unsigned HOST_WIDE_INT temp1, temp2;
4070 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4071 struct four_ints *immediates;
4072 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4074 /* Find out which operations are safe for a given CODE. Also do a quick
4075 check for degenerate cases; these can occur when DImode operations
4076 are split. */
4077 switch (code)
4079 case SET:
4080 can_invert = 1;
4081 break;
4083 case PLUS:
4084 can_negate = 1;
4085 break;
4087 case IOR:
4088 if (remainder == 0xffffffff)
4090 if (generate)
4091 emit_constant_insn (cond,
4092 gen_rtx_SET (VOIDmode, target,
4093 GEN_INT (ARM_SIGN_EXTEND (val))));
4094 return 1;
4097 if (remainder == 0)
4099 if (reload_completed && rtx_equal_p (target, source))
4100 return 0;
4102 if (generate)
4103 emit_constant_insn (cond,
4104 gen_rtx_SET (VOIDmode, target, source));
4105 return 1;
4107 break;
4109 case AND:
4110 if (remainder == 0)
4112 if (generate)
4113 emit_constant_insn (cond,
4114 gen_rtx_SET (VOIDmode, target, const0_rtx));
4115 return 1;
4117 if (remainder == 0xffffffff)
4119 if (reload_completed && rtx_equal_p (target, source))
4120 return 0;
4121 if (generate)
4122 emit_constant_insn (cond,
4123 gen_rtx_SET (VOIDmode, target, source));
4124 return 1;
4126 can_invert = 1;
4127 break;
4129 case XOR:
4130 if (remainder == 0)
4132 if (reload_completed && rtx_equal_p (target, source))
4133 return 0;
4134 if (generate)
4135 emit_constant_insn (cond,
4136 gen_rtx_SET (VOIDmode, target, source));
4137 return 1;
4140 if (remainder == 0xffffffff)
4142 if (generate)
4143 emit_constant_insn (cond,
4144 gen_rtx_SET (VOIDmode, target,
4145 gen_rtx_NOT (mode, source)));
4146 return 1;
4148 final_invert = 1;
4149 break;
4151 case MINUS:
4152 /* We treat MINUS as (val - source), since (source - val) is always
4153 passed as (source + (-val)). */
4154 if (remainder == 0)
4156 if (generate)
4157 emit_constant_insn (cond,
4158 gen_rtx_SET (VOIDmode, target,
4159 gen_rtx_NEG (mode, source)));
4160 return 1;
4162 if (const_ok_for_arm (val))
4164 if (generate)
4165 emit_constant_insn (cond,
4166 gen_rtx_SET (VOIDmode, target,
4167 gen_rtx_MINUS (mode, GEN_INT (val),
4168 source)));
4169 return 1;
4172 break;
4174 default:
4175 gcc_unreachable ();
4178 /* If we can do it in one insn get out quickly. */
4179 if (const_ok_for_op (val, code))
4181 if (generate)
4182 emit_constant_insn (cond,
4183 gen_rtx_SET (VOIDmode, target,
4184 (source
4185 ? gen_rtx_fmt_ee (code, mode, source,
4186 GEN_INT (val))
4187 : GEN_INT (val))));
4188 return 1;
4191 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4192 insn. */
4193 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4194 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4196 if (generate)
4198 if (mode == SImode && i == 16)
4199 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4200 smaller insn. */
4201 emit_constant_insn (cond,
4202 gen_zero_extendhisi2
4203 (target, gen_lowpart (HImode, source)));
4204 else
4205 /* Extz only supports SImode, but we can coerce the operands
4206 into that mode. */
4207 emit_constant_insn (cond,
4208 gen_extzv_t2 (gen_lowpart (SImode, target),
4209 gen_lowpart (SImode, source),
4210 GEN_INT (i), const0_rtx));
4213 return 1;
4216 /* Calculate a few attributes that may be useful for specific
4217 optimizations. */
4218 /* Count number of leading zeros. */
4219 for (i = 31; i >= 0; i--)
4221 if ((remainder & (1 << i)) == 0)
4222 clear_sign_bit_copies++;
4223 else
4224 break;
4227 /* Count number of leading 1's. */
4228 for (i = 31; i >= 0; i--)
4230 if ((remainder & (1 << i)) != 0)
4231 set_sign_bit_copies++;
4232 else
4233 break;
4236 /* Count number of trailing zero's. */
4237 for (i = 0; i <= 31; i++)
4239 if ((remainder & (1 << i)) == 0)
4240 clear_zero_bit_copies++;
4241 else
4242 break;
4245 /* Count number of trailing 1's. */
4246 for (i = 0; i <= 31; i++)
4248 if ((remainder & (1 << i)) != 0)
4249 set_zero_bit_copies++;
4250 else
4251 break;
4254 switch (code)
4256 case SET:
4257 /* See if we can do this by sign_extending a constant that is known
4258 to be negative. This is a good, way of doing it, since the shift
4259 may well merge into a subsequent insn. */
4260 if (set_sign_bit_copies > 1)
4262 if (const_ok_for_arm
4263 (temp1 = ARM_SIGN_EXTEND (remainder
4264 << (set_sign_bit_copies - 1))))
4266 if (generate)
4268 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4269 emit_constant_insn (cond,
4270 gen_rtx_SET (VOIDmode, new_src,
4271 GEN_INT (temp1)));
4272 emit_constant_insn (cond,
4273 gen_ashrsi3 (target, new_src,
4274 GEN_INT (set_sign_bit_copies - 1)));
4276 return 2;
4278 /* For an inverted constant, we will need to set the low bits,
4279 these will be shifted out of harm's way. */
4280 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4281 if (const_ok_for_arm (~temp1))
4283 if (generate)
4285 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4286 emit_constant_insn (cond,
4287 gen_rtx_SET (VOIDmode, new_src,
4288 GEN_INT (temp1)));
4289 emit_constant_insn (cond,
4290 gen_ashrsi3 (target, new_src,
4291 GEN_INT (set_sign_bit_copies - 1)));
4293 return 2;
4297 /* See if we can calculate the value as the difference between two
4298 valid immediates. */
4299 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4301 int topshift = clear_sign_bit_copies & ~1;
4303 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4304 & (0xff000000 >> topshift));
4306 /* If temp1 is zero, then that means the 9 most significant
4307 bits of remainder were 1 and we've caused it to overflow.
4308 When topshift is 0 we don't need to do anything since we
4309 can borrow from 'bit 32'. */
4310 if (temp1 == 0 && topshift != 0)
4311 temp1 = 0x80000000 >> (topshift - 1);
4313 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4315 if (const_ok_for_arm (temp2))
4317 if (generate)
4319 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4320 emit_constant_insn (cond,
4321 gen_rtx_SET (VOIDmode, new_src,
4322 GEN_INT (temp1)));
4323 emit_constant_insn (cond,
4324 gen_addsi3 (target, new_src,
4325 GEN_INT (-temp2)));
4328 return 2;
4332 /* See if we can generate this by setting the bottom (or the top)
4333 16 bits, and then shifting these into the other half of the
4334 word. We only look for the simplest cases, to do more would cost
4335 too much. Be careful, however, not to generate this when the
4336 alternative would take fewer insns. */
4337 if (val & 0xffff0000)
4339 temp1 = remainder & 0xffff0000;
4340 temp2 = remainder & 0x0000ffff;
4342 /* Overlaps outside this range are best done using other methods. */
4343 for (i = 9; i < 24; i++)
4345 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4346 && !const_ok_for_arm (temp2))
4348 rtx new_src = (subtargets
4349 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4350 : target);
4351 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4352 source, subtargets, generate);
4353 source = new_src;
4354 if (generate)
4355 emit_constant_insn
4356 (cond,
4357 gen_rtx_SET
4358 (VOIDmode, target,
4359 gen_rtx_IOR (mode,
4360 gen_rtx_ASHIFT (mode, source,
4361 GEN_INT (i)),
4362 source)));
4363 return insns + 1;
4367 /* Don't duplicate cases already considered. */
4368 for (i = 17; i < 24; i++)
4370 if (((temp1 | (temp1 >> i)) == remainder)
4371 && !const_ok_for_arm (temp1))
4373 rtx new_src = (subtargets
4374 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4375 : target);
4376 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4377 source, subtargets, generate);
4378 source = new_src;
4379 if (generate)
4380 emit_constant_insn
4381 (cond,
4382 gen_rtx_SET (VOIDmode, target,
4383 gen_rtx_IOR
4384 (mode,
4385 gen_rtx_LSHIFTRT (mode, source,
4386 GEN_INT (i)),
4387 source)));
4388 return insns + 1;
4392 break;
4394 case IOR:
4395 case XOR:
4396 /* If we have IOR or XOR, and the constant can be loaded in a
4397 single instruction, and we can find a temporary to put it in,
4398 then this can be done in two instructions instead of 3-4. */
4399 if (subtargets
4400 /* TARGET can't be NULL if SUBTARGETS is 0 */
4401 || (reload_completed && !reg_mentioned_p (target, source)))
4403 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4405 if (generate)
4407 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4409 emit_constant_insn (cond,
4410 gen_rtx_SET (VOIDmode, sub,
4411 GEN_INT (val)));
4412 emit_constant_insn (cond,
4413 gen_rtx_SET (VOIDmode, target,
4414 gen_rtx_fmt_ee (code, mode,
4415 source, sub)));
4417 return 2;
4421 if (code == XOR)
4422 break;
4424 /* Convert.
4425 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4426 and the remainder 0s for e.g. 0xfff00000)
4427 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4429 This can be done in 2 instructions by using shifts with mov or mvn.
4430 e.g. for
4431 x = x | 0xfff00000;
4432 we generate.
4433 mvn r0, r0, asl #12
4434 mvn r0, r0, lsr #12 */
4435 if (set_sign_bit_copies > 8
4436 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4438 if (generate)
4440 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4441 rtx shift = GEN_INT (set_sign_bit_copies);
4443 emit_constant_insn
4444 (cond,
4445 gen_rtx_SET (VOIDmode, sub,
4446 gen_rtx_NOT (mode,
4447 gen_rtx_ASHIFT (mode,
4448 source,
4449 shift))));
4450 emit_constant_insn
4451 (cond,
4452 gen_rtx_SET (VOIDmode, target,
4453 gen_rtx_NOT (mode,
4454 gen_rtx_LSHIFTRT (mode, sub,
4455 shift))));
4457 return 2;
4460 /* Convert
4461 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4463 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4465 For eg. r0 = r0 | 0xfff
4466 mvn r0, r0, lsr #12
4467 mvn r0, r0, asl #12
4470 if (set_zero_bit_copies > 8
4471 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4473 if (generate)
4475 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4476 rtx shift = GEN_INT (set_zero_bit_copies);
4478 emit_constant_insn
4479 (cond,
4480 gen_rtx_SET (VOIDmode, sub,
4481 gen_rtx_NOT (mode,
4482 gen_rtx_LSHIFTRT (mode,
4483 source,
4484 shift))));
4485 emit_constant_insn
4486 (cond,
4487 gen_rtx_SET (VOIDmode, target,
4488 gen_rtx_NOT (mode,
4489 gen_rtx_ASHIFT (mode, sub,
4490 shift))));
4492 return 2;
4495 /* This will never be reached for Thumb2 because orn is a valid
4496 instruction. This is for Thumb1 and the ARM 32 bit cases.
4498 x = y | constant (such that ~constant is a valid constant)
4499 Transform this to
4500 x = ~(~y & ~constant).
4502 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4504 if (generate)
4506 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4507 emit_constant_insn (cond,
4508 gen_rtx_SET (VOIDmode, sub,
4509 gen_rtx_NOT (mode, source)));
4510 source = sub;
4511 if (subtargets)
4512 sub = gen_reg_rtx (mode);
4513 emit_constant_insn (cond,
4514 gen_rtx_SET (VOIDmode, sub,
4515 gen_rtx_AND (mode, source,
4516 GEN_INT (temp1))));
4517 emit_constant_insn (cond,
4518 gen_rtx_SET (VOIDmode, target,
4519 gen_rtx_NOT (mode, sub)));
4521 return 3;
4523 break;
4525 case AND:
4526 /* See if two shifts will do 2 or more insn's worth of work. */
4527 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4529 HOST_WIDE_INT shift_mask = ((0xffffffff
4530 << (32 - clear_sign_bit_copies))
4531 & 0xffffffff);
4533 if ((remainder | shift_mask) != 0xffffffff)
4535 if (generate)
4537 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4538 insns = arm_gen_constant (AND, mode, cond,
4539 remainder | shift_mask,
4540 new_src, source, subtargets, 1);
4541 source = new_src;
4543 else
4545 rtx targ = subtargets ? NULL_RTX : target;
4546 insns = arm_gen_constant (AND, mode, cond,
4547 remainder | shift_mask,
4548 targ, source, subtargets, 0);
4552 if (generate)
4554 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4555 rtx shift = GEN_INT (clear_sign_bit_copies);
4557 emit_insn (gen_ashlsi3 (new_src, source, shift));
4558 emit_insn (gen_lshrsi3 (target, new_src, shift));
4561 return insns + 2;
4564 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4566 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4568 if ((remainder | shift_mask) != 0xffffffff)
4570 if (generate)
4572 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4574 insns = arm_gen_constant (AND, mode, cond,
4575 remainder | shift_mask,
4576 new_src, source, subtargets, 1);
4577 source = new_src;
4579 else
4581 rtx targ = subtargets ? NULL_RTX : target;
4583 insns = arm_gen_constant (AND, mode, cond,
4584 remainder | shift_mask,
4585 targ, source, subtargets, 0);
4589 if (generate)
4591 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4592 rtx shift = GEN_INT (clear_zero_bit_copies);
4594 emit_insn (gen_lshrsi3 (new_src, source, shift));
4595 emit_insn (gen_ashlsi3 (target, new_src, shift));
4598 return insns + 2;
4601 break;
4603 default:
4604 break;
4607 /* Calculate what the instruction sequences would be if we generated it
4608 normally, negated, or inverted. */
4609 if (code == AND)
4610 /* AND cannot be split into multiple insns, so invert and use BIC. */
4611 insns = 99;
4612 else
4613 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4615 if (can_negate)
4616 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4617 &neg_immediates);
4618 else
4619 neg_insns = 99;
4621 if (can_invert || final_invert)
4622 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4623 &inv_immediates);
4624 else
4625 inv_insns = 99;
4627 immediates = &pos_immediates;
4629 /* Is the negated immediate sequence more efficient? */
4630 if (neg_insns < insns && neg_insns <= inv_insns)
4632 insns = neg_insns;
4633 immediates = &neg_immediates;
4635 else
4636 can_negate = 0;
4638 /* Is the inverted immediate sequence more efficient?
4639 We must allow for an extra NOT instruction for XOR operations, although
4640 there is some chance that the final 'mvn' will get optimized later. */
4641 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4643 insns = inv_insns;
4644 immediates = &inv_immediates;
4646 else
4648 can_invert = 0;
4649 final_invert = 0;
4652 /* Now output the chosen sequence as instructions. */
4653 if (generate)
4655 for (i = 0; i < insns; i++)
4657 rtx new_src, temp1_rtx;
4659 temp1 = immediates->i[i];
4661 if (code == SET || code == MINUS)
4662 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4663 else if ((final_invert || i < (insns - 1)) && subtargets)
4664 new_src = gen_reg_rtx (mode);
4665 else
4666 new_src = target;
4668 if (can_invert)
4669 temp1 = ~temp1;
4670 else if (can_negate)
4671 temp1 = -temp1;
4673 temp1 = trunc_int_for_mode (temp1, mode);
4674 temp1_rtx = GEN_INT (temp1);
4676 if (code == SET)
4678 else if (code == MINUS)
4679 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4680 else
4681 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4683 emit_constant_insn (cond,
4684 gen_rtx_SET (VOIDmode, new_src,
4685 temp1_rtx));
4686 source = new_src;
4688 if (code == SET)
4690 can_negate = can_invert;
4691 can_invert = 0;
4692 code = PLUS;
4694 else if (code == MINUS)
4695 code = PLUS;
4699 if (final_invert)
4701 if (generate)
4702 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4703 gen_rtx_NOT (mode, source)));
4704 insns++;
4707 return insns;
4710 /* Canonicalize a comparison so that we are more likely to recognize it.
4711 This can be done for a few constant compares, where we can make the
4712 immediate value easier to load. */
4714 static void
4715 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4716 bool op0_preserve_value)
4718 machine_mode mode;
4719 unsigned HOST_WIDE_INT i, maxval;
4721 mode = GET_MODE (*op0);
4722 if (mode == VOIDmode)
4723 mode = GET_MODE (*op1);
4725 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4727 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4728 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4729 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4730 for GTU/LEU in Thumb mode. */
4731 if (mode == DImode)
4734 if (*code == GT || *code == LE
4735 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4737 /* Missing comparison. First try to use an available
4738 comparison. */
4739 if (CONST_INT_P (*op1))
4741 i = INTVAL (*op1);
4742 switch (*code)
4744 case GT:
4745 case LE:
4746 if (i != maxval
4747 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4749 *op1 = GEN_INT (i + 1);
4750 *code = *code == GT ? GE : LT;
4751 return;
4753 break;
4754 case GTU:
4755 case LEU:
4756 if (i != ~((unsigned HOST_WIDE_INT) 0)
4757 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4759 *op1 = GEN_INT (i + 1);
4760 *code = *code == GTU ? GEU : LTU;
4761 return;
4763 break;
4764 default:
4765 gcc_unreachable ();
4769 /* If that did not work, reverse the condition. */
4770 if (!op0_preserve_value)
4772 std::swap (*op0, *op1);
4773 *code = (int)swap_condition ((enum rtx_code)*code);
4776 return;
4779 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4780 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4781 to facilitate possible combining with a cmp into 'ands'. */
4782 if (mode == SImode
4783 && GET_CODE (*op0) == ZERO_EXTEND
4784 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4785 && GET_MODE (XEXP (*op0, 0)) == QImode
4786 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4787 && subreg_lowpart_p (XEXP (*op0, 0))
4788 && *op1 == const0_rtx)
4789 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4790 GEN_INT (255));
4792 /* Comparisons smaller than DImode. Only adjust comparisons against
4793 an out-of-range constant. */
4794 if (!CONST_INT_P (*op1)
4795 || const_ok_for_arm (INTVAL (*op1))
4796 || const_ok_for_arm (- INTVAL (*op1)))
4797 return;
4799 i = INTVAL (*op1);
4801 switch (*code)
4803 case EQ:
4804 case NE:
4805 return;
4807 case GT:
4808 case LE:
4809 if (i != maxval
4810 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4812 *op1 = GEN_INT (i + 1);
4813 *code = *code == GT ? GE : LT;
4814 return;
4816 break;
4818 case GE:
4819 case LT:
4820 if (i != ~maxval
4821 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4823 *op1 = GEN_INT (i - 1);
4824 *code = *code == GE ? GT : LE;
4825 return;
4827 break;
4829 case GTU:
4830 case LEU:
4831 if (i != ~((unsigned HOST_WIDE_INT) 0)
4832 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4834 *op1 = GEN_INT (i + 1);
4835 *code = *code == GTU ? GEU : LTU;
4836 return;
4838 break;
4840 case GEU:
4841 case LTU:
4842 if (i != 0
4843 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4845 *op1 = GEN_INT (i - 1);
4846 *code = *code == GEU ? GTU : LEU;
4847 return;
4849 break;
4851 default:
4852 gcc_unreachable ();
4857 /* Define how to find the value returned by a function. */
4859 static rtx
4860 arm_function_value(const_tree type, const_tree func,
4861 bool outgoing ATTRIBUTE_UNUSED)
4863 machine_mode mode;
4864 int unsignedp ATTRIBUTE_UNUSED;
4865 rtx r ATTRIBUTE_UNUSED;
4867 mode = TYPE_MODE (type);
4869 if (TARGET_AAPCS_BASED)
4870 return aapcs_allocate_return_reg (mode, type, func);
4872 /* Promote integer types. */
4873 if (INTEGRAL_TYPE_P (type))
4874 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4876 /* Promotes small structs returned in a register to full-word size
4877 for big-endian AAPCS. */
4878 if (arm_return_in_msb (type))
4880 HOST_WIDE_INT size = int_size_in_bytes (type);
4881 if (size % UNITS_PER_WORD != 0)
4883 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4884 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4888 return arm_libcall_value_1 (mode);
4891 /* libcall hashtable helpers. */
4893 struct libcall_hasher : typed_noop_remove <rtx_def>
4895 typedef rtx_def value_type;
4896 typedef rtx_def compare_type;
4897 static inline hashval_t hash (const value_type *);
4898 static inline bool equal (const value_type *, const compare_type *);
4899 static inline void remove (value_type *);
4902 inline bool
4903 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4905 return rtx_equal_p (p1, p2);
4908 inline hashval_t
4909 libcall_hasher::hash (const value_type *p1)
4911 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4914 typedef hash_table<libcall_hasher> libcall_table_type;
4916 static void
4917 add_libcall (libcall_table_type *htab, rtx libcall)
4919 *htab->find_slot (libcall, INSERT) = libcall;
4922 static bool
4923 arm_libcall_uses_aapcs_base (const_rtx libcall)
4925 static bool init_done = false;
4926 static libcall_table_type *libcall_htab = NULL;
4928 if (!init_done)
4930 init_done = true;
4932 libcall_htab = new libcall_table_type (31);
4933 add_libcall (libcall_htab,
4934 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4935 add_libcall (libcall_htab,
4936 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4937 add_libcall (libcall_htab,
4938 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4939 add_libcall (libcall_htab,
4940 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4942 add_libcall (libcall_htab,
4943 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4944 add_libcall (libcall_htab,
4945 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4946 add_libcall (libcall_htab,
4947 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4948 add_libcall (libcall_htab,
4949 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4951 add_libcall (libcall_htab,
4952 convert_optab_libfunc (sext_optab, SFmode, HFmode));
4953 add_libcall (libcall_htab,
4954 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4955 add_libcall (libcall_htab,
4956 convert_optab_libfunc (sfix_optab, SImode, DFmode));
4957 add_libcall (libcall_htab,
4958 convert_optab_libfunc (ufix_optab, SImode, DFmode));
4959 add_libcall (libcall_htab,
4960 convert_optab_libfunc (sfix_optab, DImode, DFmode));
4961 add_libcall (libcall_htab,
4962 convert_optab_libfunc (ufix_optab, DImode, DFmode));
4963 add_libcall (libcall_htab,
4964 convert_optab_libfunc (sfix_optab, DImode, SFmode));
4965 add_libcall (libcall_htab,
4966 convert_optab_libfunc (ufix_optab, DImode, SFmode));
4968 /* Values from double-precision helper functions are returned in core
4969 registers if the selected core only supports single-precision
4970 arithmetic, even if we are using the hard-float ABI. The same is
4971 true for single-precision helpers, but we will never be using the
4972 hard-float ABI on a CPU which doesn't support single-precision
4973 operations in hardware. */
4974 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4975 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4976 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4977 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4978 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4979 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4980 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4981 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4982 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4983 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4984 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4985 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4986 SFmode));
4987 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4988 DFmode));
4991 return libcall && libcall_htab->find (libcall) != NULL;
4994 static rtx
4995 arm_libcall_value_1 (machine_mode mode)
4997 if (TARGET_AAPCS_BASED)
4998 return aapcs_libcall_value (mode);
4999 else if (TARGET_IWMMXT_ABI
5000 && arm_vector_mode_supported_p (mode))
5001 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5002 else
5003 return gen_rtx_REG (mode, ARG_REGISTER (1));
5006 /* Define how to find the value returned by a library function
5007 assuming the value has mode MODE. */
5009 static rtx
5010 arm_libcall_value (machine_mode mode, const_rtx libcall)
5012 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5013 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5015 /* The following libcalls return their result in integer registers,
5016 even though they return a floating point value. */
5017 if (arm_libcall_uses_aapcs_base (libcall))
5018 return gen_rtx_REG (mode, ARG_REGISTER(1));
5022 return arm_libcall_value_1 (mode);
5025 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5027 static bool
5028 arm_function_value_regno_p (const unsigned int regno)
5030 if (regno == ARG_REGISTER (1)
5031 || (TARGET_32BIT
5032 && TARGET_AAPCS_BASED
5033 && TARGET_VFP
5034 && TARGET_HARD_FLOAT
5035 && regno == FIRST_VFP_REGNUM)
5036 || (TARGET_IWMMXT_ABI
5037 && regno == FIRST_IWMMXT_REGNUM))
5038 return true;
5040 return false;
5043 /* Determine the amount of memory needed to store the possible return
5044 registers of an untyped call. */
5046 arm_apply_result_size (void)
5048 int size = 16;
5050 if (TARGET_32BIT)
5052 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
5053 size += 32;
5054 if (TARGET_IWMMXT_ABI)
5055 size += 8;
5058 return size;
5061 /* Decide whether TYPE should be returned in memory (true)
5062 or in a register (false). FNTYPE is the type of the function making
5063 the call. */
5064 static bool
5065 arm_return_in_memory (const_tree type, const_tree fntype)
5067 HOST_WIDE_INT size;
5069 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5071 if (TARGET_AAPCS_BASED)
5073 /* Simple, non-aggregate types (ie not including vectors and
5074 complex) are always returned in a register (or registers).
5075 We don't care about which register here, so we can short-cut
5076 some of the detail. */
5077 if (!AGGREGATE_TYPE_P (type)
5078 && TREE_CODE (type) != VECTOR_TYPE
5079 && TREE_CODE (type) != COMPLEX_TYPE)
5080 return false;
5082 /* Any return value that is no larger than one word can be
5083 returned in r0. */
5084 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5085 return false;
5087 /* Check any available co-processors to see if they accept the
5088 type as a register candidate (VFP, for example, can return
5089 some aggregates in consecutive registers). These aren't
5090 available if the call is variadic. */
5091 if (aapcs_select_return_coproc (type, fntype) >= 0)
5092 return false;
5094 /* Vector values should be returned using ARM registers, not
5095 memory (unless they're over 16 bytes, which will break since
5096 we only have four call-clobbered registers to play with). */
5097 if (TREE_CODE (type) == VECTOR_TYPE)
5098 return (size < 0 || size > (4 * UNITS_PER_WORD));
5100 /* The rest go in memory. */
5101 return true;
5104 if (TREE_CODE (type) == VECTOR_TYPE)
5105 return (size < 0 || size > (4 * UNITS_PER_WORD));
5107 if (!AGGREGATE_TYPE_P (type) &&
5108 (TREE_CODE (type) != VECTOR_TYPE))
5109 /* All simple types are returned in registers. */
5110 return false;
5112 if (arm_abi != ARM_ABI_APCS)
5114 /* ATPCS and later return aggregate types in memory only if they are
5115 larger than a word (or are variable size). */
5116 return (size < 0 || size > UNITS_PER_WORD);
5119 /* For the arm-wince targets we choose to be compatible with Microsoft's
5120 ARM and Thumb compilers, which always return aggregates in memory. */
5121 #ifndef ARM_WINCE
5122 /* All structures/unions bigger than one word are returned in memory.
5123 Also catch the case where int_size_in_bytes returns -1. In this case
5124 the aggregate is either huge or of variable size, and in either case
5125 we will want to return it via memory and not in a register. */
5126 if (size < 0 || size > UNITS_PER_WORD)
5127 return true;
5129 if (TREE_CODE (type) == RECORD_TYPE)
5131 tree field;
5133 /* For a struct the APCS says that we only return in a register
5134 if the type is 'integer like' and every addressable element
5135 has an offset of zero. For practical purposes this means
5136 that the structure can have at most one non bit-field element
5137 and that this element must be the first one in the structure. */
5139 /* Find the first field, ignoring non FIELD_DECL things which will
5140 have been created by C++. */
5141 for (field = TYPE_FIELDS (type);
5142 field && TREE_CODE (field) != FIELD_DECL;
5143 field = DECL_CHAIN (field))
5144 continue;
5146 if (field == NULL)
5147 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5149 /* Check that the first field is valid for returning in a register. */
5151 /* ... Floats are not allowed */
5152 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5153 return true;
5155 /* ... Aggregates that are not themselves valid for returning in
5156 a register are not allowed. */
5157 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5158 return true;
5160 /* Now check the remaining fields, if any. Only bitfields are allowed,
5161 since they are not addressable. */
5162 for (field = DECL_CHAIN (field);
5163 field;
5164 field = DECL_CHAIN (field))
5166 if (TREE_CODE (field) != FIELD_DECL)
5167 continue;
5169 if (!DECL_BIT_FIELD_TYPE (field))
5170 return true;
5173 return false;
5176 if (TREE_CODE (type) == UNION_TYPE)
5178 tree field;
5180 /* Unions can be returned in registers if every element is
5181 integral, or can be returned in an integer register. */
5182 for (field = TYPE_FIELDS (type);
5183 field;
5184 field = DECL_CHAIN (field))
5186 if (TREE_CODE (field) != FIELD_DECL)
5187 continue;
5189 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5190 return true;
5192 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5193 return true;
5196 return false;
5198 #endif /* not ARM_WINCE */
5200 /* Return all other types in memory. */
5201 return true;
5204 const struct pcs_attribute_arg
5206 const char *arg;
5207 enum arm_pcs value;
5208 } pcs_attribute_args[] =
5210 {"aapcs", ARM_PCS_AAPCS},
5211 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5212 #if 0
5213 /* We could recognize these, but changes would be needed elsewhere
5214 * to implement them. */
5215 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5216 {"atpcs", ARM_PCS_ATPCS},
5217 {"apcs", ARM_PCS_APCS},
5218 #endif
5219 {NULL, ARM_PCS_UNKNOWN}
5222 static enum arm_pcs
5223 arm_pcs_from_attribute (tree attr)
5225 const struct pcs_attribute_arg *ptr;
5226 const char *arg;
5228 /* Get the value of the argument. */
5229 if (TREE_VALUE (attr) == NULL_TREE
5230 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5231 return ARM_PCS_UNKNOWN;
5233 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5235 /* Check it against the list of known arguments. */
5236 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5237 if (streq (arg, ptr->arg))
5238 return ptr->value;
5240 /* An unrecognized interrupt type. */
5241 return ARM_PCS_UNKNOWN;
5244 /* Get the PCS variant to use for this call. TYPE is the function's type
5245 specification, DECL is the specific declartion. DECL may be null if
5246 the call could be indirect or if this is a library call. */
5247 static enum arm_pcs
5248 arm_get_pcs_model (const_tree type, const_tree decl)
5250 bool user_convention = false;
5251 enum arm_pcs user_pcs = arm_pcs_default;
5252 tree attr;
5254 gcc_assert (type);
5256 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5257 if (attr)
5259 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5260 user_convention = true;
5263 if (TARGET_AAPCS_BASED)
5265 /* Detect varargs functions. These always use the base rules
5266 (no argument is ever a candidate for a co-processor
5267 register). */
5268 bool base_rules = stdarg_p (type);
5270 if (user_convention)
5272 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5273 sorry ("non-AAPCS derived PCS variant");
5274 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5275 error ("variadic functions must use the base AAPCS variant");
5278 if (base_rules)
5279 return ARM_PCS_AAPCS;
5280 else if (user_convention)
5281 return user_pcs;
5282 else if (decl && flag_unit_at_a_time)
5284 /* Local functions never leak outside this compilation unit,
5285 so we are free to use whatever conventions are
5286 appropriate. */
5287 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5288 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5289 if (i && i->local)
5290 return ARM_PCS_AAPCS_LOCAL;
5293 else if (user_convention && user_pcs != arm_pcs_default)
5294 sorry ("PCS variant");
5296 /* For everything else we use the target's default. */
5297 return arm_pcs_default;
5301 static void
5302 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5303 const_tree fntype ATTRIBUTE_UNUSED,
5304 rtx libcall ATTRIBUTE_UNUSED,
5305 const_tree fndecl ATTRIBUTE_UNUSED)
5307 /* Record the unallocated VFP registers. */
5308 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5309 pcum->aapcs_vfp_reg_alloc = 0;
5312 /* Walk down the type tree of TYPE counting consecutive base elements.
5313 If *MODEP is VOIDmode, then set it to the first valid floating point
5314 type. If a non-floating point type is found, or if a floating point
5315 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5316 otherwise return the count in the sub-tree. */
5317 static int
5318 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5320 machine_mode mode;
5321 HOST_WIDE_INT size;
5323 switch (TREE_CODE (type))
5325 case REAL_TYPE:
5326 mode = TYPE_MODE (type);
5327 if (mode != DFmode && mode != SFmode)
5328 return -1;
5330 if (*modep == VOIDmode)
5331 *modep = mode;
5333 if (*modep == mode)
5334 return 1;
5336 break;
5338 case COMPLEX_TYPE:
5339 mode = TYPE_MODE (TREE_TYPE (type));
5340 if (mode != DFmode && mode != SFmode)
5341 return -1;
5343 if (*modep == VOIDmode)
5344 *modep = mode;
5346 if (*modep == mode)
5347 return 2;
5349 break;
5351 case VECTOR_TYPE:
5352 /* Use V2SImode and V4SImode as representatives of all 64-bit
5353 and 128-bit vector types, whether or not those modes are
5354 supported with the present options. */
5355 size = int_size_in_bytes (type);
5356 switch (size)
5358 case 8:
5359 mode = V2SImode;
5360 break;
5361 case 16:
5362 mode = V4SImode;
5363 break;
5364 default:
5365 return -1;
5368 if (*modep == VOIDmode)
5369 *modep = mode;
5371 /* Vector modes are considered to be opaque: two vectors are
5372 equivalent for the purposes of being homogeneous aggregates
5373 if they are the same size. */
5374 if (*modep == mode)
5375 return 1;
5377 break;
5379 case ARRAY_TYPE:
5381 int count;
5382 tree index = TYPE_DOMAIN (type);
5384 /* Can't handle incomplete types nor sizes that are not
5385 fixed. */
5386 if (!COMPLETE_TYPE_P (type)
5387 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5388 return -1;
5390 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5391 if (count == -1
5392 || !index
5393 || !TYPE_MAX_VALUE (index)
5394 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5395 || !TYPE_MIN_VALUE (index)
5396 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5397 || count < 0)
5398 return -1;
5400 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5401 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5403 /* There must be no padding. */
5404 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5405 return -1;
5407 return count;
5410 case RECORD_TYPE:
5412 int count = 0;
5413 int sub_count;
5414 tree field;
5416 /* Can't handle incomplete types nor sizes that are not
5417 fixed. */
5418 if (!COMPLETE_TYPE_P (type)
5419 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5420 return -1;
5422 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5424 if (TREE_CODE (field) != FIELD_DECL)
5425 continue;
5427 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5428 if (sub_count < 0)
5429 return -1;
5430 count += sub_count;
5433 /* There must be no padding. */
5434 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5435 return -1;
5437 return count;
5440 case UNION_TYPE:
5441 case QUAL_UNION_TYPE:
5443 /* These aren't very interesting except in a degenerate case. */
5444 int count = 0;
5445 int sub_count;
5446 tree field;
5448 /* Can't handle incomplete types nor sizes that are not
5449 fixed. */
5450 if (!COMPLETE_TYPE_P (type)
5451 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5452 return -1;
5454 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5456 if (TREE_CODE (field) != FIELD_DECL)
5457 continue;
5459 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5460 if (sub_count < 0)
5461 return -1;
5462 count = count > sub_count ? count : sub_count;
5465 /* There must be no padding. */
5466 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5467 return -1;
5469 return count;
5472 default:
5473 break;
5476 return -1;
5479 /* Return true if PCS_VARIANT should use VFP registers. */
5480 static bool
5481 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5483 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5485 static bool seen_thumb1_vfp = false;
5487 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5489 sorry ("Thumb-1 hard-float VFP ABI");
5490 /* sorry() is not immediately fatal, so only display this once. */
5491 seen_thumb1_vfp = true;
5494 return true;
5497 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5498 return false;
5500 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5501 (TARGET_VFP_DOUBLE || !is_double));
5504 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5505 suitable for passing or returning in VFP registers for the PCS
5506 variant selected. If it is, then *BASE_MODE is updated to contain
5507 a machine mode describing each element of the argument's type and
5508 *COUNT to hold the number of such elements. */
5509 static bool
5510 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5511 machine_mode mode, const_tree type,
5512 machine_mode *base_mode, int *count)
5514 machine_mode new_mode = VOIDmode;
5516 /* If we have the type information, prefer that to working things
5517 out from the mode. */
5518 if (type)
5520 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5522 if (ag_count > 0 && ag_count <= 4)
5523 *count = ag_count;
5524 else
5525 return false;
5527 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5528 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5529 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5531 *count = 1;
5532 new_mode = mode;
5534 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5536 *count = 2;
5537 new_mode = (mode == DCmode ? DFmode : SFmode);
5539 else
5540 return false;
5543 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5544 return false;
5546 *base_mode = new_mode;
5547 return true;
5550 static bool
5551 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5552 machine_mode mode, const_tree type)
5554 int count ATTRIBUTE_UNUSED;
5555 machine_mode ag_mode ATTRIBUTE_UNUSED;
5557 if (!use_vfp_abi (pcs_variant, false))
5558 return false;
5559 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5560 &ag_mode, &count);
5563 static bool
5564 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5565 const_tree type)
5567 if (!use_vfp_abi (pcum->pcs_variant, false))
5568 return false;
5570 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5571 &pcum->aapcs_vfp_rmode,
5572 &pcum->aapcs_vfp_rcount);
5575 static bool
5576 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5577 const_tree type ATTRIBUTE_UNUSED)
5579 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5580 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5581 int regno;
5583 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5584 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5586 pcum->aapcs_vfp_reg_alloc = mask << regno;
5587 if (mode == BLKmode
5588 || (mode == TImode && ! TARGET_NEON)
5589 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5591 int i;
5592 int rcount = pcum->aapcs_vfp_rcount;
5593 int rshift = shift;
5594 machine_mode rmode = pcum->aapcs_vfp_rmode;
5595 rtx par;
5596 if (!TARGET_NEON)
5598 /* Avoid using unsupported vector modes. */
5599 if (rmode == V2SImode)
5600 rmode = DImode;
5601 else if (rmode == V4SImode)
5603 rmode = DImode;
5604 rcount *= 2;
5605 rshift /= 2;
5608 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5609 for (i = 0; i < rcount; i++)
5611 rtx tmp = gen_rtx_REG (rmode,
5612 FIRST_VFP_REGNUM + regno + i * rshift);
5613 tmp = gen_rtx_EXPR_LIST
5614 (VOIDmode, tmp,
5615 GEN_INT (i * GET_MODE_SIZE (rmode)));
5616 XVECEXP (par, 0, i) = tmp;
5619 pcum->aapcs_reg = par;
5621 else
5622 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5623 return true;
5625 return false;
5628 static rtx
5629 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5630 machine_mode mode,
5631 const_tree type ATTRIBUTE_UNUSED)
5633 if (!use_vfp_abi (pcs_variant, false))
5634 return NULL;
5636 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5638 int count;
5639 machine_mode ag_mode;
5640 int i;
5641 rtx par;
5642 int shift;
5644 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5645 &ag_mode, &count);
5647 if (!TARGET_NEON)
5649 if (ag_mode == V2SImode)
5650 ag_mode = DImode;
5651 else if (ag_mode == V4SImode)
5653 ag_mode = DImode;
5654 count *= 2;
5657 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5658 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5659 for (i = 0; i < count; i++)
5661 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5662 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5663 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5664 XVECEXP (par, 0, i) = tmp;
5667 return par;
5670 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5673 static void
5674 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5675 machine_mode mode ATTRIBUTE_UNUSED,
5676 const_tree type ATTRIBUTE_UNUSED)
5678 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5679 pcum->aapcs_vfp_reg_alloc = 0;
5680 return;
5683 #define AAPCS_CP(X) \
5685 aapcs_ ## X ## _cum_init, \
5686 aapcs_ ## X ## _is_call_candidate, \
5687 aapcs_ ## X ## _allocate, \
5688 aapcs_ ## X ## _is_return_candidate, \
5689 aapcs_ ## X ## _allocate_return_reg, \
5690 aapcs_ ## X ## _advance \
5693 /* Table of co-processors that can be used to pass arguments in
5694 registers. Idealy no arugment should be a candidate for more than
5695 one co-processor table entry, but the table is processed in order
5696 and stops after the first match. If that entry then fails to put
5697 the argument into a co-processor register, the argument will go on
5698 the stack. */
5699 static struct
5701 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5702 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5704 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5705 BLKmode) is a candidate for this co-processor's registers; this
5706 function should ignore any position-dependent state in
5707 CUMULATIVE_ARGS and only use call-type dependent information. */
5708 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5710 /* Return true if the argument does get a co-processor register; it
5711 should set aapcs_reg to an RTX of the register allocated as is
5712 required for a return from FUNCTION_ARG. */
5713 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5715 /* Return true if a result of mode MODE (or type TYPE if MODE is
5716 BLKmode) is can be returned in this co-processor's registers. */
5717 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
5719 /* Allocate and return an RTX element to hold the return type of a
5720 call, this routine must not fail and will only be called if
5721 is_return_candidate returned true with the same parameters. */
5722 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
5724 /* Finish processing this argument and prepare to start processing
5725 the next one. */
5726 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5727 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5729 AAPCS_CP(vfp)
5732 #undef AAPCS_CP
5734 static int
5735 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
5736 const_tree type)
5738 int i;
5740 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5741 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5742 return i;
5744 return -1;
5747 static int
5748 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5750 /* We aren't passed a decl, so we can't check that a call is local.
5751 However, it isn't clear that that would be a win anyway, since it
5752 might limit some tail-calling opportunities. */
5753 enum arm_pcs pcs_variant;
5755 if (fntype)
5757 const_tree fndecl = NULL_TREE;
5759 if (TREE_CODE (fntype) == FUNCTION_DECL)
5761 fndecl = fntype;
5762 fntype = TREE_TYPE (fntype);
5765 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5767 else
5768 pcs_variant = arm_pcs_default;
5770 if (pcs_variant != ARM_PCS_AAPCS)
5772 int i;
5774 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5775 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5776 TYPE_MODE (type),
5777 type))
5778 return i;
5780 return -1;
5783 static rtx
5784 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
5785 const_tree fntype)
5787 /* We aren't passed a decl, so we can't check that a call is local.
5788 However, it isn't clear that that would be a win anyway, since it
5789 might limit some tail-calling opportunities. */
5790 enum arm_pcs pcs_variant;
5791 int unsignedp ATTRIBUTE_UNUSED;
5793 if (fntype)
5795 const_tree fndecl = NULL_TREE;
5797 if (TREE_CODE (fntype) == FUNCTION_DECL)
5799 fndecl = fntype;
5800 fntype = TREE_TYPE (fntype);
5803 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5805 else
5806 pcs_variant = arm_pcs_default;
5808 /* Promote integer types. */
5809 if (type && INTEGRAL_TYPE_P (type))
5810 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5812 if (pcs_variant != ARM_PCS_AAPCS)
5814 int i;
5816 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5817 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5818 type))
5819 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5820 mode, type);
5823 /* Promotes small structs returned in a register to full-word size
5824 for big-endian AAPCS. */
5825 if (type && arm_return_in_msb (type))
5827 HOST_WIDE_INT size = int_size_in_bytes (type);
5828 if (size % UNITS_PER_WORD != 0)
5830 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5831 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5835 return gen_rtx_REG (mode, R0_REGNUM);
5838 static rtx
5839 aapcs_libcall_value (machine_mode mode)
5841 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5842 && GET_MODE_SIZE (mode) <= 4)
5843 mode = SImode;
5845 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5848 /* Lay out a function argument using the AAPCS rules. The rule
5849 numbers referred to here are those in the AAPCS. */
5850 static void
5851 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
5852 const_tree type, bool named)
5854 int nregs, nregs2;
5855 int ncrn;
5857 /* We only need to do this once per argument. */
5858 if (pcum->aapcs_arg_processed)
5859 return;
5861 pcum->aapcs_arg_processed = true;
5863 /* Special case: if named is false then we are handling an incoming
5864 anonymous argument which is on the stack. */
5865 if (!named)
5866 return;
5868 /* Is this a potential co-processor register candidate? */
5869 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5871 int slot = aapcs_select_call_coproc (pcum, mode, type);
5872 pcum->aapcs_cprc_slot = slot;
5874 /* We don't have to apply any of the rules from part B of the
5875 preparation phase, these are handled elsewhere in the
5876 compiler. */
5878 if (slot >= 0)
5880 /* A Co-processor register candidate goes either in its own
5881 class of registers or on the stack. */
5882 if (!pcum->aapcs_cprc_failed[slot])
5884 /* C1.cp - Try to allocate the argument to co-processor
5885 registers. */
5886 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5887 return;
5889 /* C2.cp - Put the argument on the stack and note that we
5890 can't assign any more candidates in this slot. We also
5891 need to note that we have allocated stack space, so that
5892 we won't later try to split a non-cprc candidate between
5893 core registers and the stack. */
5894 pcum->aapcs_cprc_failed[slot] = true;
5895 pcum->can_split = false;
5898 /* We didn't get a register, so this argument goes on the
5899 stack. */
5900 gcc_assert (pcum->can_split == false);
5901 return;
5905 /* C3 - For double-word aligned arguments, round the NCRN up to the
5906 next even number. */
5907 ncrn = pcum->aapcs_ncrn;
5908 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5909 ncrn++;
5911 nregs = ARM_NUM_REGS2(mode, type);
5913 /* Sigh, this test should really assert that nregs > 0, but a GCC
5914 extension allows empty structs and then gives them empty size; it
5915 then allows such a structure to be passed by value. For some of
5916 the code below we have to pretend that such an argument has
5917 non-zero size so that we 'locate' it correctly either in
5918 registers or on the stack. */
5919 gcc_assert (nregs >= 0);
5921 nregs2 = nregs ? nregs : 1;
5923 /* C4 - Argument fits entirely in core registers. */
5924 if (ncrn + nregs2 <= NUM_ARG_REGS)
5926 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5927 pcum->aapcs_next_ncrn = ncrn + nregs;
5928 return;
5931 /* C5 - Some core registers left and there are no arguments already
5932 on the stack: split this argument between the remaining core
5933 registers and the stack. */
5934 if (ncrn < NUM_ARG_REGS && pcum->can_split)
5936 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5937 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5938 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5939 return;
5942 /* C6 - NCRN is set to 4. */
5943 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5945 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5946 return;
5949 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5950 for a call to a function whose data type is FNTYPE.
5951 For a library call, FNTYPE is NULL. */
5952 void
5953 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5954 rtx libname,
5955 tree fndecl ATTRIBUTE_UNUSED)
5957 /* Long call handling. */
5958 if (fntype)
5959 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5960 else
5961 pcum->pcs_variant = arm_pcs_default;
5963 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5965 if (arm_libcall_uses_aapcs_base (libname))
5966 pcum->pcs_variant = ARM_PCS_AAPCS;
5968 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5969 pcum->aapcs_reg = NULL_RTX;
5970 pcum->aapcs_partial = 0;
5971 pcum->aapcs_arg_processed = false;
5972 pcum->aapcs_cprc_slot = -1;
5973 pcum->can_split = true;
5975 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5977 int i;
5979 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5981 pcum->aapcs_cprc_failed[i] = false;
5982 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5985 return;
5988 /* Legacy ABIs */
5990 /* On the ARM, the offset starts at 0. */
5991 pcum->nregs = 0;
5992 pcum->iwmmxt_nregs = 0;
5993 pcum->can_split = true;
5995 /* Varargs vectors are treated the same as long long.
5996 named_count avoids having to change the way arm handles 'named' */
5997 pcum->named_count = 0;
5998 pcum->nargs = 0;
6000 if (TARGET_REALLY_IWMMXT && fntype)
6002 tree fn_arg;
6004 for (fn_arg = TYPE_ARG_TYPES (fntype);
6005 fn_arg;
6006 fn_arg = TREE_CHAIN (fn_arg))
6007 pcum->named_count += 1;
6009 if (! pcum->named_count)
6010 pcum->named_count = INT_MAX;
6014 /* Return true if mode/type need doubleword alignment. */
6015 static bool
6016 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6018 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
6019 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
6023 /* Determine where to put an argument to a function.
6024 Value is zero to push the argument on the stack,
6025 or a hard register in which to store the argument.
6027 MODE is the argument's machine mode.
6028 TYPE is the data type of the argument (as a tree).
6029 This is null for libcalls where that information may
6030 not be available.
6031 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6032 the preceding args and about the function being called.
6033 NAMED is nonzero if this argument is a named parameter
6034 (otherwise it is an extra parameter matching an ellipsis).
6036 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6037 other arguments are passed on the stack. If (NAMED == 0) (which happens
6038 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6039 defined), say it is passed in the stack (function_prologue will
6040 indeed make it pass in the stack if necessary). */
6042 static rtx
6043 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6044 const_tree type, bool named)
6046 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6047 int nregs;
6049 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6050 a call insn (op3 of a call_value insn). */
6051 if (mode == VOIDmode)
6052 return const0_rtx;
6054 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6056 aapcs_layout_arg (pcum, mode, type, named);
6057 return pcum->aapcs_reg;
6060 /* Varargs vectors are treated the same as long long.
6061 named_count avoids having to change the way arm handles 'named' */
6062 if (TARGET_IWMMXT_ABI
6063 && arm_vector_mode_supported_p (mode)
6064 && pcum->named_count > pcum->nargs + 1)
6066 if (pcum->iwmmxt_nregs <= 9)
6067 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6068 else
6070 pcum->can_split = false;
6071 return NULL_RTX;
6075 /* Put doubleword aligned quantities in even register pairs. */
6076 if (pcum->nregs & 1
6077 && ARM_DOUBLEWORD_ALIGN
6078 && arm_needs_doubleword_align (mode, type))
6079 pcum->nregs++;
6081 /* Only allow splitting an arg between regs and memory if all preceding
6082 args were allocated to regs. For args passed by reference we only count
6083 the reference pointer. */
6084 if (pcum->can_split)
6085 nregs = 1;
6086 else
6087 nregs = ARM_NUM_REGS2 (mode, type);
6089 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6090 return NULL_RTX;
6092 return gen_rtx_REG (mode, pcum->nregs);
6095 static unsigned int
6096 arm_function_arg_boundary (machine_mode mode, const_tree type)
6098 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
6099 ? DOUBLEWORD_ALIGNMENT
6100 : PARM_BOUNDARY);
6103 static int
6104 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6105 tree type, bool named)
6107 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6108 int nregs = pcum->nregs;
6110 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6112 aapcs_layout_arg (pcum, mode, type, named);
6113 return pcum->aapcs_partial;
6116 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6117 return 0;
6119 if (NUM_ARG_REGS > nregs
6120 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6121 && pcum->can_split)
6122 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6124 return 0;
6127 /* Update the data in PCUM to advance over an argument
6128 of mode MODE and data type TYPE.
6129 (TYPE is null for libcalls where that information may not be available.) */
6131 static void
6132 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6133 const_tree type, bool named)
6135 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6137 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6139 aapcs_layout_arg (pcum, mode, type, named);
6141 if (pcum->aapcs_cprc_slot >= 0)
6143 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6144 type);
6145 pcum->aapcs_cprc_slot = -1;
6148 /* Generic stuff. */
6149 pcum->aapcs_arg_processed = false;
6150 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6151 pcum->aapcs_reg = NULL_RTX;
6152 pcum->aapcs_partial = 0;
6154 else
6156 pcum->nargs += 1;
6157 if (arm_vector_mode_supported_p (mode)
6158 && pcum->named_count > pcum->nargs
6159 && TARGET_IWMMXT_ABI)
6160 pcum->iwmmxt_nregs += 1;
6161 else
6162 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6166 /* Variable sized types are passed by reference. This is a GCC
6167 extension to the ARM ABI. */
6169 static bool
6170 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6171 machine_mode mode ATTRIBUTE_UNUSED,
6172 const_tree type, bool named ATTRIBUTE_UNUSED)
6174 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6177 /* Encode the current state of the #pragma [no_]long_calls. */
6178 typedef enum
6180 OFF, /* No #pragma [no_]long_calls is in effect. */
6181 LONG, /* #pragma long_calls is in effect. */
6182 SHORT /* #pragma no_long_calls is in effect. */
6183 } arm_pragma_enum;
6185 static arm_pragma_enum arm_pragma_long_calls = OFF;
6187 void
6188 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6190 arm_pragma_long_calls = LONG;
6193 void
6194 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6196 arm_pragma_long_calls = SHORT;
6199 void
6200 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6202 arm_pragma_long_calls = OFF;
6205 /* Handle an attribute requiring a FUNCTION_DECL;
6206 arguments as in struct attribute_spec.handler. */
6207 static tree
6208 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6209 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6211 if (TREE_CODE (*node) != FUNCTION_DECL)
6213 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6214 name);
6215 *no_add_attrs = true;
6218 return NULL_TREE;
6221 /* Handle an "interrupt" or "isr" attribute;
6222 arguments as in struct attribute_spec.handler. */
6223 static tree
6224 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6225 bool *no_add_attrs)
6227 if (DECL_P (*node))
6229 if (TREE_CODE (*node) != FUNCTION_DECL)
6231 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6232 name);
6233 *no_add_attrs = true;
6235 /* FIXME: the argument if any is checked for type attributes;
6236 should it be checked for decl ones? */
6238 else
6240 if (TREE_CODE (*node) == FUNCTION_TYPE
6241 || TREE_CODE (*node) == METHOD_TYPE)
6243 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6245 warning (OPT_Wattributes, "%qE attribute ignored",
6246 name);
6247 *no_add_attrs = true;
6250 else if (TREE_CODE (*node) == POINTER_TYPE
6251 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6252 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6253 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6255 *node = build_variant_type_copy (*node);
6256 TREE_TYPE (*node) = build_type_attribute_variant
6257 (TREE_TYPE (*node),
6258 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6259 *no_add_attrs = true;
6261 else
6263 /* Possibly pass this attribute on from the type to a decl. */
6264 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6265 | (int) ATTR_FLAG_FUNCTION_NEXT
6266 | (int) ATTR_FLAG_ARRAY_NEXT))
6268 *no_add_attrs = true;
6269 return tree_cons (name, args, NULL_TREE);
6271 else
6273 warning (OPT_Wattributes, "%qE attribute ignored",
6274 name);
6279 return NULL_TREE;
6282 /* Handle a "pcs" attribute; arguments as in struct
6283 attribute_spec.handler. */
6284 static tree
6285 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6286 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6288 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6290 warning (OPT_Wattributes, "%qE attribute ignored", name);
6291 *no_add_attrs = true;
6293 return NULL_TREE;
6296 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6297 /* Handle the "notshared" attribute. This attribute is another way of
6298 requesting hidden visibility. ARM's compiler supports
6299 "__declspec(notshared)"; we support the same thing via an
6300 attribute. */
6302 static tree
6303 arm_handle_notshared_attribute (tree *node,
6304 tree name ATTRIBUTE_UNUSED,
6305 tree args ATTRIBUTE_UNUSED,
6306 int flags ATTRIBUTE_UNUSED,
6307 bool *no_add_attrs)
6309 tree decl = TYPE_NAME (*node);
6311 if (decl)
6313 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6314 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6315 *no_add_attrs = false;
6317 return NULL_TREE;
6319 #endif
6321 /* Return 0 if the attributes for two types are incompatible, 1 if they
6322 are compatible, and 2 if they are nearly compatible (which causes a
6323 warning to be generated). */
6324 static int
6325 arm_comp_type_attributes (const_tree type1, const_tree type2)
6327 int l1, l2, s1, s2;
6329 /* Check for mismatch of non-default calling convention. */
6330 if (TREE_CODE (type1) != FUNCTION_TYPE)
6331 return 1;
6333 /* Check for mismatched call attributes. */
6334 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6335 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6336 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6337 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6339 /* Only bother to check if an attribute is defined. */
6340 if (l1 | l2 | s1 | s2)
6342 /* If one type has an attribute, the other must have the same attribute. */
6343 if ((l1 != l2) || (s1 != s2))
6344 return 0;
6346 /* Disallow mixed attributes. */
6347 if ((l1 & s2) || (l2 & s1))
6348 return 0;
6351 /* Check for mismatched ISR attribute. */
6352 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6353 if (! l1)
6354 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6355 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6356 if (! l2)
6357 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6358 if (l1 != l2)
6359 return 0;
6361 return 1;
6364 /* Assigns default attributes to newly defined type. This is used to
6365 set short_call/long_call attributes for function types of
6366 functions defined inside corresponding #pragma scopes. */
6367 static void
6368 arm_set_default_type_attributes (tree type)
6370 /* Add __attribute__ ((long_call)) to all functions, when
6371 inside #pragma long_calls or __attribute__ ((short_call)),
6372 when inside #pragma no_long_calls. */
6373 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6375 tree type_attr_list, attr_name;
6376 type_attr_list = TYPE_ATTRIBUTES (type);
6378 if (arm_pragma_long_calls == LONG)
6379 attr_name = get_identifier ("long_call");
6380 else if (arm_pragma_long_calls == SHORT)
6381 attr_name = get_identifier ("short_call");
6382 else
6383 return;
6385 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6386 TYPE_ATTRIBUTES (type) = type_attr_list;
6390 /* Return true if DECL is known to be linked into section SECTION. */
6392 static bool
6393 arm_function_in_section_p (tree decl, section *section)
6395 /* We can only be certain about functions defined in the same
6396 compilation unit. */
6397 if (!TREE_STATIC (decl))
6398 return false;
6400 /* Make sure that SYMBOL always binds to the definition in this
6401 compilation unit. */
6402 if (!targetm.binds_local_p (decl))
6403 return false;
6405 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6406 if (!DECL_SECTION_NAME (decl))
6408 /* Make sure that we will not create a unique section for DECL. */
6409 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6410 return false;
6413 return function_section (decl) == section;
6416 /* Return nonzero if a 32-bit "long_call" should be generated for
6417 a call from the current function to DECL. We generate a long_call
6418 if the function:
6420 a. has an __attribute__((long call))
6421 or b. is within the scope of a #pragma long_calls
6422 or c. the -mlong-calls command line switch has been specified
6424 However we do not generate a long call if the function:
6426 d. has an __attribute__ ((short_call))
6427 or e. is inside the scope of a #pragma no_long_calls
6428 or f. is defined in the same section as the current function. */
6430 bool
6431 arm_is_long_call_p (tree decl)
6433 tree attrs;
6435 if (!decl)
6436 return TARGET_LONG_CALLS;
6438 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6439 if (lookup_attribute ("short_call", attrs))
6440 return false;
6442 /* For "f", be conservative, and only cater for cases in which the
6443 whole of the current function is placed in the same section. */
6444 if (!flag_reorder_blocks_and_partition
6445 && TREE_CODE (decl) == FUNCTION_DECL
6446 && arm_function_in_section_p (decl, current_function_section ()))
6447 return false;
6449 if (lookup_attribute ("long_call", attrs))
6450 return true;
6452 return TARGET_LONG_CALLS;
6455 /* Return nonzero if it is ok to make a tail-call to DECL. */
6456 static bool
6457 arm_function_ok_for_sibcall (tree decl, tree exp)
6459 unsigned long func_type;
6461 if (cfun->machine->sibcall_blocked)
6462 return false;
6464 /* Never tailcall something if we are generating code for Thumb-1. */
6465 if (TARGET_THUMB1)
6466 return false;
6468 /* The PIC register is live on entry to VxWorks PLT entries, so we
6469 must make the call before restoring the PIC register. */
6470 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6471 return false;
6473 /* If we are interworking and the function is not declared static
6474 then we can't tail-call it unless we know that it exists in this
6475 compilation unit (since it might be a Thumb routine). */
6476 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6477 && !TREE_ASM_WRITTEN (decl))
6478 return false;
6480 func_type = arm_current_func_type ();
6481 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6482 if (IS_INTERRUPT (func_type))
6483 return false;
6485 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6487 /* Check that the return value locations are the same. For
6488 example that we aren't returning a value from the sibling in
6489 a VFP register but then need to transfer it to a core
6490 register. */
6491 rtx a, b;
6493 a = arm_function_value (TREE_TYPE (exp), decl, false);
6494 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6495 cfun->decl, false);
6496 if (!rtx_equal_p (a, b))
6497 return false;
6500 /* Never tailcall if function may be called with a misaligned SP. */
6501 if (IS_STACKALIGN (func_type))
6502 return false;
6504 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6505 references should become a NOP. Don't convert such calls into
6506 sibling calls. */
6507 if (TARGET_AAPCS_BASED
6508 && arm_abi == ARM_ABI_AAPCS
6509 && decl
6510 && DECL_WEAK (decl))
6511 return false;
6513 /* Everything else is ok. */
6514 return true;
6518 /* Addressing mode support functions. */
6520 /* Return nonzero if X is a legitimate immediate operand when compiling
6521 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6523 legitimate_pic_operand_p (rtx x)
6525 if (GET_CODE (x) == SYMBOL_REF
6526 || (GET_CODE (x) == CONST
6527 && GET_CODE (XEXP (x, 0)) == PLUS
6528 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6529 return 0;
6531 return 1;
6534 /* Record that the current function needs a PIC register. Initialize
6535 cfun->machine->pic_reg if we have not already done so. */
6537 static void
6538 require_pic_register (void)
6540 /* A lot of the logic here is made obscure by the fact that this
6541 routine gets called as part of the rtx cost estimation process.
6542 We don't want those calls to affect any assumptions about the real
6543 function; and further, we can't call entry_of_function() until we
6544 start the real expansion process. */
6545 if (!crtl->uses_pic_offset_table)
6547 gcc_assert (can_create_pseudo_p ());
6548 if (arm_pic_register != INVALID_REGNUM
6549 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6551 if (!cfun->machine->pic_reg)
6552 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6554 /* Play games to avoid marking the function as needing pic
6555 if we are being called as part of the cost-estimation
6556 process. */
6557 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6558 crtl->uses_pic_offset_table = 1;
6560 else
6562 rtx_insn *seq, *insn;
6564 if (!cfun->machine->pic_reg)
6565 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6567 /* Play games to avoid marking the function as needing pic
6568 if we are being called as part of the cost-estimation
6569 process. */
6570 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6572 crtl->uses_pic_offset_table = 1;
6573 start_sequence ();
6575 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6576 && arm_pic_register > LAST_LO_REGNUM)
6577 emit_move_insn (cfun->machine->pic_reg,
6578 gen_rtx_REG (Pmode, arm_pic_register));
6579 else
6580 arm_load_pic_register (0UL);
6582 seq = get_insns ();
6583 end_sequence ();
6585 for (insn = seq; insn; insn = NEXT_INSN (insn))
6586 if (INSN_P (insn))
6587 INSN_LOCATION (insn) = prologue_location;
6589 /* We can be called during expansion of PHI nodes, where
6590 we can't yet emit instructions directly in the final
6591 insn stream. Queue the insns on the entry edge, they will
6592 be committed after everything else is expanded. */
6593 insert_insn_on_edge (seq,
6594 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6601 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6603 if (GET_CODE (orig) == SYMBOL_REF
6604 || GET_CODE (orig) == LABEL_REF)
6606 rtx insn;
6608 if (reg == 0)
6610 gcc_assert (can_create_pseudo_p ());
6611 reg = gen_reg_rtx (Pmode);
6614 /* VxWorks does not impose a fixed gap between segments; the run-time
6615 gap can be different from the object-file gap. We therefore can't
6616 use GOTOFF unless we are absolutely sure that the symbol is in the
6617 same segment as the GOT. Unfortunately, the flexibility of linker
6618 scripts means that we can't be sure of that in general, so assume
6619 that GOTOFF is never valid on VxWorks. */
6620 if ((GET_CODE (orig) == LABEL_REF
6621 || (GET_CODE (orig) == SYMBOL_REF &&
6622 SYMBOL_REF_LOCAL_P (orig)))
6623 && NEED_GOT_RELOC
6624 && arm_pic_data_is_text_relative)
6625 insn = arm_pic_static_addr (orig, reg);
6626 else
6628 rtx pat;
6629 rtx mem;
6631 /* If this function doesn't have a pic register, create one now. */
6632 require_pic_register ();
6634 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6636 /* Make the MEM as close to a constant as possible. */
6637 mem = SET_SRC (pat);
6638 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6639 MEM_READONLY_P (mem) = 1;
6640 MEM_NOTRAP_P (mem) = 1;
6642 insn = emit_insn (pat);
6645 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6646 by loop. */
6647 set_unique_reg_note (insn, REG_EQUAL, orig);
6649 return reg;
6651 else if (GET_CODE (orig) == CONST)
6653 rtx base, offset;
6655 if (GET_CODE (XEXP (orig, 0)) == PLUS
6656 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6657 return orig;
6659 /* Handle the case where we have: const (UNSPEC_TLS). */
6660 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6661 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6662 return orig;
6664 /* Handle the case where we have:
6665 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6666 CONST_INT. */
6667 if (GET_CODE (XEXP (orig, 0)) == PLUS
6668 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6669 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6671 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6672 return orig;
6675 if (reg == 0)
6677 gcc_assert (can_create_pseudo_p ());
6678 reg = gen_reg_rtx (Pmode);
6681 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6683 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6684 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6685 base == reg ? 0 : reg);
6687 if (CONST_INT_P (offset))
6689 /* The base register doesn't really matter, we only want to
6690 test the index for the appropriate mode. */
6691 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6693 gcc_assert (can_create_pseudo_p ());
6694 offset = force_reg (Pmode, offset);
6697 if (CONST_INT_P (offset))
6698 return plus_constant (Pmode, base, INTVAL (offset));
6701 if (GET_MODE_SIZE (mode) > 4
6702 && (GET_MODE_CLASS (mode) == MODE_INT
6703 || TARGET_SOFT_FLOAT))
6705 emit_insn (gen_addsi3 (reg, base, offset));
6706 return reg;
6709 return gen_rtx_PLUS (Pmode, base, offset);
6712 return orig;
6716 /* Find a spare register to use during the prolog of a function. */
6718 static int
6719 thumb_find_work_register (unsigned long pushed_regs_mask)
6721 int reg;
6723 /* Check the argument registers first as these are call-used. The
6724 register allocation order means that sometimes r3 might be used
6725 but earlier argument registers might not, so check them all. */
6726 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6727 if (!df_regs_ever_live_p (reg))
6728 return reg;
6730 /* Before going on to check the call-saved registers we can try a couple
6731 more ways of deducing that r3 is available. The first is when we are
6732 pushing anonymous arguments onto the stack and we have less than 4
6733 registers worth of fixed arguments(*). In this case r3 will be part of
6734 the variable argument list and so we can be sure that it will be
6735 pushed right at the start of the function. Hence it will be available
6736 for the rest of the prologue.
6737 (*): ie crtl->args.pretend_args_size is greater than 0. */
6738 if (cfun->machine->uses_anonymous_args
6739 && crtl->args.pretend_args_size > 0)
6740 return LAST_ARG_REGNUM;
6742 /* The other case is when we have fixed arguments but less than 4 registers
6743 worth. In this case r3 might be used in the body of the function, but
6744 it is not being used to convey an argument into the function. In theory
6745 we could just check crtl->args.size to see how many bytes are
6746 being passed in argument registers, but it seems that it is unreliable.
6747 Sometimes it will have the value 0 when in fact arguments are being
6748 passed. (See testcase execute/20021111-1.c for an example). So we also
6749 check the args_info.nregs field as well. The problem with this field is
6750 that it makes no allowances for arguments that are passed to the
6751 function but which are not used. Hence we could miss an opportunity
6752 when a function has an unused argument in r3. But it is better to be
6753 safe than to be sorry. */
6754 if (! cfun->machine->uses_anonymous_args
6755 && crtl->args.size >= 0
6756 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6757 && (TARGET_AAPCS_BASED
6758 ? crtl->args.info.aapcs_ncrn < 4
6759 : crtl->args.info.nregs < 4))
6760 return LAST_ARG_REGNUM;
6762 /* Otherwise look for a call-saved register that is going to be pushed. */
6763 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6764 if (pushed_regs_mask & (1 << reg))
6765 return reg;
6767 if (TARGET_THUMB2)
6769 /* Thumb-2 can use high regs. */
6770 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6771 if (pushed_regs_mask & (1 << reg))
6772 return reg;
6774 /* Something went wrong - thumb_compute_save_reg_mask()
6775 should have arranged for a suitable register to be pushed. */
6776 gcc_unreachable ();
6779 static GTY(()) int pic_labelno;
6781 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6782 low register. */
6784 void
6785 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6787 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6789 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6790 return;
6792 gcc_assert (flag_pic);
6794 pic_reg = cfun->machine->pic_reg;
6795 if (TARGET_VXWORKS_RTP)
6797 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6798 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6799 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6801 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6803 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6804 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6806 else
6808 /* We use an UNSPEC rather than a LABEL_REF because this label
6809 never appears in the code stream. */
6811 labelno = GEN_INT (pic_labelno++);
6812 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6813 l1 = gen_rtx_CONST (VOIDmode, l1);
6815 /* On the ARM the PC register contains 'dot + 8' at the time of the
6816 addition, on the Thumb it is 'dot + 4'. */
6817 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6818 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6819 UNSPEC_GOTSYM_OFF);
6820 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6822 if (TARGET_32BIT)
6824 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6826 else /* TARGET_THUMB1 */
6828 if (arm_pic_register != INVALID_REGNUM
6829 && REGNO (pic_reg) > LAST_LO_REGNUM)
6831 /* We will have pushed the pic register, so we should always be
6832 able to find a work register. */
6833 pic_tmp = gen_rtx_REG (SImode,
6834 thumb_find_work_register (saved_regs));
6835 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6836 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6837 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6839 else if (arm_pic_register != INVALID_REGNUM
6840 && arm_pic_register > LAST_LO_REGNUM
6841 && REGNO (pic_reg) <= LAST_LO_REGNUM)
6843 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6844 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6845 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6847 else
6848 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6852 /* Need to emit this whether or not we obey regdecls,
6853 since setjmp/longjmp can cause life info to screw up. */
6854 emit_use (pic_reg);
6857 /* Generate code to load the address of a static var when flag_pic is set. */
6858 static rtx
6859 arm_pic_static_addr (rtx orig, rtx reg)
6861 rtx l1, labelno, offset_rtx, insn;
6863 gcc_assert (flag_pic);
6865 /* We use an UNSPEC rather than a LABEL_REF because this label
6866 never appears in the code stream. */
6867 labelno = GEN_INT (pic_labelno++);
6868 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6869 l1 = gen_rtx_CONST (VOIDmode, l1);
6871 /* On the ARM the PC register contains 'dot + 8' at the time of the
6872 addition, on the Thumb it is 'dot + 4'. */
6873 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6874 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6875 UNSPEC_SYMBOL_OFFSET);
6876 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6878 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6879 return insn;
6882 /* Return nonzero if X is valid as an ARM state addressing register. */
6883 static int
6884 arm_address_register_rtx_p (rtx x, int strict_p)
6886 int regno;
6888 if (!REG_P (x))
6889 return 0;
6891 regno = REGNO (x);
6893 if (strict_p)
6894 return ARM_REGNO_OK_FOR_BASE_P (regno);
6896 return (regno <= LAST_ARM_REGNUM
6897 || regno >= FIRST_PSEUDO_REGISTER
6898 || regno == FRAME_POINTER_REGNUM
6899 || regno == ARG_POINTER_REGNUM);
6902 /* Return TRUE if this rtx is the difference of a symbol and a label,
6903 and will reduce to a PC-relative relocation in the object file.
6904 Expressions like this can be left alone when generating PIC, rather
6905 than forced through the GOT. */
6906 static int
6907 pcrel_constant_p (rtx x)
6909 if (GET_CODE (x) == MINUS)
6910 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6912 return FALSE;
6915 /* Return true if X will surely end up in an index register after next
6916 splitting pass. */
6917 static bool
6918 will_be_in_index_register (const_rtx x)
6920 /* arm.md: calculate_pic_address will split this into a register. */
6921 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6924 /* Return nonzero if X is a valid ARM state address operand. */
6926 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
6927 int strict_p)
6929 bool use_ldrd;
6930 enum rtx_code code = GET_CODE (x);
6932 if (arm_address_register_rtx_p (x, strict_p))
6933 return 1;
6935 use_ldrd = (TARGET_LDRD
6936 && (mode == DImode
6937 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6939 if (code == POST_INC || code == PRE_DEC
6940 || ((code == PRE_INC || code == POST_DEC)
6941 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6942 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6944 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6945 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6946 && GET_CODE (XEXP (x, 1)) == PLUS
6947 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6949 rtx addend = XEXP (XEXP (x, 1), 1);
6951 /* Don't allow ldrd post increment by register because it's hard
6952 to fixup invalid register choices. */
6953 if (use_ldrd
6954 && GET_CODE (x) == POST_MODIFY
6955 && REG_P (addend))
6956 return 0;
6958 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6959 && arm_legitimate_index_p (mode, addend, outer, strict_p));
6962 /* After reload constants split into minipools will have addresses
6963 from a LABEL_REF. */
6964 else if (reload_completed
6965 && (code == LABEL_REF
6966 || (code == CONST
6967 && GET_CODE (XEXP (x, 0)) == PLUS
6968 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6969 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6970 return 1;
6972 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6973 return 0;
6975 else if (code == PLUS)
6977 rtx xop0 = XEXP (x, 0);
6978 rtx xop1 = XEXP (x, 1);
6980 return ((arm_address_register_rtx_p (xop0, strict_p)
6981 && ((CONST_INT_P (xop1)
6982 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6983 || (!strict_p && will_be_in_index_register (xop1))))
6984 || (arm_address_register_rtx_p (xop1, strict_p)
6985 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6988 #if 0
6989 /* Reload currently can't handle MINUS, so disable this for now */
6990 else if (GET_CODE (x) == MINUS)
6992 rtx xop0 = XEXP (x, 0);
6993 rtx xop1 = XEXP (x, 1);
6995 return (arm_address_register_rtx_p (xop0, strict_p)
6996 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6998 #endif
7000 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7001 && code == SYMBOL_REF
7002 && CONSTANT_POOL_ADDRESS_P (x)
7003 && ! (flag_pic
7004 && symbol_mentioned_p (get_pool_constant (x))
7005 && ! pcrel_constant_p (get_pool_constant (x))))
7006 return 1;
7008 return 0;
7011 /* Return nonzero if X is a valid Thumb-2 address operand. */
7012 static int
7013 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7015 bool use_ldrd;
7016 enum rtx_code code = GET_CODE (x);
7018 if (arm_address_register_rtx_p (x, strict_p))
7019 return 1;
7021 use_ldrd = (TARGET_LDRD
7022 && (mode == DImode
7023 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
7025 if (code == POST_INC || code == PRE_DEC
7026 || ((code == PRE_INC || code == POST_DEC)
7027 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7028 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7030 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7031 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7032 && GET_CODE (XEXP (x, 1)) == PLUS
7033 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7035 /* Thumb-2 only has autoincrement by constant. */
7036 rtx addend = XEXP (XEXP (x, 1), 1);
7037 HOST_WIDE_INT offset;
7039 if (!CONST_INT_P (addend))
7040 return 0;
7042 offset = INTVAL(addend);
7043 if (GET_MODE_SIZE (mode) <= 4)
7044 return (offset > -256 && offset < 256);
7046 return (use_ldrd && offset > -1024 && offset < 1024
7047 && (offset & 3) == 0);
7050 /* After reload constants split into minipools will have addresses
7051 from a LABEL_REF. */
7052 else if (reload_completed
7053 && (code == LABEL_REF
7054 || (code == CONST
7055 && GET_CODE (XEXP (x, 0)) == PLUS
7056 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7057 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7058 return 1;
7060 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7061 return 0;
7063 else if (code == PLUS)
7065 rtx xop0 = XEXP (x, 0);
7066 rtx xop1 = XEXP (x, 1);
7068 return ((arm_address_register_rtx_p (xop0, strict_p)
7069 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7070 || (!strict_p && will_be_in_index_register (xop1))))
7071 || (arm_address_register_rtx_p (xop1, strict_p)
7072 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7075 /* Normally we can assign constant values to target registers without
7076 the help of constant pool. But there are cases we have to use constant
7077 pool like:
7078 1) assign a label to register.
7079 2) sign-extend a 8bit value to 32bit and then assign to register.
7081 Constant pool access in format:
7082 (set (reg r0) (mem (symbol_ref (".LC0"))))
7083 will cause the use of literal pool (later in function arm_reorg).
7084 So here we mark such format as an invalid format, then the compiler
7085 will adjust it into:
7086 (set (reg r0) (symbol_ref (".LC0")))
7087 (set (reg r0) (mem (reg r0))).
7088 No extra register is required, and (mem (reg r0)) won't cause the use
7089 of literal pools. */
7090 else if (arm_disable_literal_pool && code == SYMBOL_REF
7091 && CONSTANT_POOL_ADDRESS_P (x))
7092 return 0;
7094 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7095 && code == SYMBOL_REF
7096 && CONSTANT_POOL_ADDRESS_P (x)
7097 && ! (flag_pic
7098 && symbol_mentioned_p (get_pool_constant (x))
7099 && ! pcrel_constant_p (get_pool_constant (x))))
7100 return 1;
7102 return 0;
7105 /* Return nonzero if INDEX is valid for an address index operand in
7106 ARM state. */
7107 static int
7108 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7109 int strict_p)
7111 HOST_WIDE_INT range;
7112 enum rtx_code code = GET_CODE (index);
7114 /* Standard coprocessor addressing modes. */
7115 if (TARGET_HARD_FLOAT
7116 && TARGET_VFP
7117 && (mode == SFmode || mode == DFmode))
7118 return (code == CONST_INT && INTVAL (index) < 1024
7119 && INTVAL (index) > -1024
7120 && (INTVAL (index) & 3) == 0);
7122 /* For quad modes, we restrict the constant offset to be slightly less
7123 than what the instruction format permits. We do this because for
7124 quad mode moves, we will actually decompose them into two separate
7125 double-mode reads or writes. INDEX must therefore be a valid
7126 (double-mode) offset and so should INDEX+8. */
7127 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7128 return (code == CONST_INT
7129 && INTVAL (index) < 1016
7130 && INTVAL (index) > -1024
7131 && (INTVAL (index) & 3) == 0);
7133 /* We have no such constraint on double mode offsets, so we permit the
7134 full range of the instruction format. */
7135 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7136 return (code == CONST_INT
7137 && INTVAL (index) < 1024
7138 && INTVAL (index) > -1024
7139 && (INTVAL (index) & 3) == 0);
7141 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7142 return (code == CONST_INT
7143 && INTVAL (index) < 1024
7144 && INTVAL (index) > -1024
7145 && (INTVAL (index) & 3) == 0);
7147 if (arm_address_register_rtx_p (index, strict_p)
7148 && (GET_MODE_SIZE (mode) <= 4))
7149 return 1;
7151 if (mode == DImode || mode == DFmode)
7153 if (code == CONST_INT)
7155 HOST_WIDE_INT val = INTVAL (index);
7157 if (TARGET_LDRD)
7158 return val > -256 && val < 256;
7159 else
7160 return val > -4096 && val < 4092;
7163 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7166 if (GET_MODE_SIZE (mode) <= 4
7167 && ! (arm_arch4
7168 && (mode == HImode
7169 || mode == HFmode
7170 || (mode == QImode && outer == SIGN_EXTEND))))
7172 if (code == MULT)
7174 rtx xiop0 = XEXP (index, 0);
7175 rtx xiop1 = XEXP (index, 1);
7177 return ((arm_address_register_rtx_p (xiop0, strict_p)
7178 && power_of_two_operand (xiop1, SImode))
7179 || (arm_address_register_rtx_p (xiop1, strict_p)
7180 && power_of_two_operand (xiop0, SImode)));
7182 else if (code == LSHIFTRT || code == ASHIFTRT
7183 || code == ASHIFT || code == ROTATERT)
7185 rtx op = XEXP (index, 1);
7187 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7188 && CONST_INT_P (op)
7189 && INTVAL (op) > 0
7190 && INTVAL (op) <= 31);
7194 /* For ARM v4 we may be doing a sign-extend operation during the
7195 load. */
7196 if (arm_arch4)
7198 if (mode == HImode
7199 || mode == HFmode
7200 || (outer == SIGN_EXTEND && mode == QImode))
7201 range = 256;
7202 else
7203 range = 4096;
7205 else
7206 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7208 return (code == CONST_INT
7209 && INTVAL (index) < range
7210 && INTVAL (index) > -range);
7213 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7214 index operand. i.e. 1, 2, 4 or 8. */
7215 static bool
7216 thumb2_index_mul_operand (rtx op)
7218 HOST_WIDE_INT val;
7220 if (!CONST_INT_P (op))
7221 return false;
7223 val = INTVAL(op);
7224 return (val == 1 || val == 2 || val == 4 || val == 8);
7227 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7228 static int
7229 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7231 enum rtx_code code = GET_CODE (index);
7233 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7234 /* Standard coprocessor addressing modes. */
7235 if (TARGET_HARD_FLOAT
7236 && TARGET_VFP
7237 && (mode == SFmode || mode == DFmode))
7238 return (code == CONST_INT && INTVAL (index) < 1024
7239 /* Thumb-2 allows only > -256 index range for it's core register
7240 load/stores. Since we allow SF/DF in core registers, we have
7241 to use the intersection between -256~4096 (core) and -1024~1024
7242 (coprocessor). */
7243 && INTVAL (index) > -256
7244 && (INTVAL (index) & 3) == 0);
7246 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7248 /* For DImode assume values will usually live in core regs
7249 and only allow LDRD addressing modes. */
7250 if (!TARGET_LDRD || mode != DImode)
7251 return (code == CONST_INT
7252 && INTVAL (index) < 1024
7253 && INTVAL (index) > -1024
7254 && (INTVAL (index) & 3) == 0);
7257 /* For quad modes, we restrict the constant offset to be slightly less
7258 than what the instruction format permits. We do this because for
7259 quad mode moves, we will actually decompose them into two separate
7260 double-mode reads or writes. INDEX must therefore be a valid
7261 (double-mode) offset and so should INDEX+8. */
7262 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7263 return (code == CONST_INT
7264 && INTVAL (index) < 1016
7265 && INTVAL (index) > -1024
7266 && (INTVAL (index) & 3) == 0);
7268 /* We have no such constraint on double mode offsets, so we permit the
7269 full range of the instruction format. */
7270 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7271 return (code == CONST_INT
7272 && INTVAL (index) < 1024
7273 && INTVAL (index) > -1024
7274 && (INTVAL (index) & 3) == 0);
7276 if (arm_address_register_rtx_p (index, strict_p)
7277 && (GET_MODE_SIZE (mode) <= 4))
7278 return 1;
7280 if (mode == DImode || mode == DFmode)
7282 if (code == CONST_INT)
7284 HOST_WIDE_INT val = INTVAL (index);
7285 /* ??? Can we assume ldrd for thumb2? */
7286 /* Thumb-2 ldrd only has reg+const addressing modes. */
7287 /* ldrd supports offsets of +-1020.
7288 However the ldr fallback does not. */
7289 return val > -256 && val < 256 && (val & 3) == 0;
7291 else
7292 return 0;
7295 if (code == MULT)
7297 rtx xiop0 = XEXP (index, 0);
7298 rtx xiop1 = XEXP (index, 1);
7300 return ((arm_address_register_rtx_p (xiop0, strict_p)
7301 && thumb2_index_mul_operand (xiop1))
7302 || (arm_address_register_rtx_p (xiop1, strict_p)
7303 && thumb2_index_mul_operand (xiop0)));
7305 else if (code == ASHIFT)
7307 rtx op = XEXP (index, 1);
7309 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7310 && CONST_INT_P (op)
7311 && INTVAL (op) > 0
7312 && INTVAL (op) <= 3);
7315 return (code == CONST_INT
7316 && INTVAL (index) < 4096
7317 && INTVAL (index) > -256);
7320 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7321 static int
7322 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7324 int regno;
7326 if (!REG_P (x))
7327 return 0;
7329 regno = REGNO (x);
7331 if (strict_p)
7332 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7334 return (regno <= LAST_LO_REGNUM
7335 || regno > LAST_VIRTUAL_REGISTER
7336 || regno == FRAME_POINTER_REGNUM
7337 || (GET_MODE_SIZE (mode) >= 4
7338 && (regno == STACK_POINTER_REGNUM
7339 || regno >= FIRST_PSEUDO_REGISTER
7340 || x == hard_frame_pointer_rtx
7341 || x == arg_pointer_rtx)));
7344 /* Return nonzero if x is a legitimate index register. This is the case
7345 for any base register that can access a QImode object. */
7346 inline static int
7347 thumb1_index_register_rtx_p (rtx x, int strict_p)
7349 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7352 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7354 The AP may be eliminated to either the SP or the FP, so we use the
7355 least common denominator, e.g. SImode, and offsets from 0 to 64.
7357 ??? Verify whether the above is the right approach.
7359 ??? Also, the FP may be eliminated to the SP, so perhaps that
7360 needs special handling also.
7362 ??? Look at how the mips16 port solves this problem. It probably uses
7363 better ways to solve some of these problems.
7365 Although it is not incorrect, we don't accept QImode and HImode
7366 addresses based on the frame pointer or arg pointer until the
7367 reload pass starts. This is so that eliminating such addresses
7368 into stack based ones won't produce impossible code. */
7370 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7372 /* ??? Not clear if this is right. Experiment. */
7373 if (GET_MODE_SIZE (mode) < 4
7374 && !(reload_in_progress || reload_completed)
7375 && (reg_mentioned_p (frame_pointer_rtx, x)
7376 || reg_mentioned_p (arg_pointer_rtx, x)
7377 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7378 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7379 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7380 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7381 return 0;
7383 /* Accept any base register. SP only in SImode or larger. */
7384 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7385 return 1;
7387 /* This is PC relative data before arm_reorg runs. */
7388 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7389 && GET_CODE (x) == SYMBOL_REF
7390 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7391 return 1;
7393 /* This is PC relative data after arm_reorg runs. */
7394 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7395 && reload_completed
7396 && (GET_CODE (x) == LABEL_REF
7397 || (GET_CODE (x) == CONST
7398 && GET_CODE (XEXP (x, 0)) == PLUS
7399 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7400 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7401 return 1;
7403 /* Post-inc indexing only supported for SImode and larger. */
7404 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7405 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7406 return 1;
7408 else if (GET_CODE (x) == PLUS)
7410 /* REG+REG address can be any two index registers. */
7411 /* We disallow FRAME+REG addressing since we know that FRAME
7412 will be replaced with STACK, and SP relative addressing only
7413 permits SP+OFFSET. */
7414 if (GET_MODE_SIZE (mode) <= 4
7415 && XEXP (x, 0) != frame_pointer_rtx
7416 && XEXP (x, 1) != frame_pointer_rtx
7417 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7418 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7419 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7420 return 1;
7422 /* REG+const has 5-7 bit offset for non-SP registers. */
7423 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7424 || XEXP (x, 0) == arg_pointer_rtx)
7425 && CONST_INT_P (XEXP (x, 1))
7426 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7427 return 1;
7429 /* REG+const has 10-bit offset for SP, but only SImode and
7430 larger is supported. */
7431 /* ??? Should probably check for DI/DFmode overflow here
7432 just like GO_IF_LEGITIMATE_OFFSET does. */
7433 else if (REG_P (XEXP (x, 0))
7434 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7435 && GET_MODE_SIZE (mode) >= 4
7436 && CONST_INT_P (XEXP (x, 1))
7437 && INTVAL (XEXP (x, 1)) >= 0
7438 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7439 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7440 return 1;
7442 else if (REG_P (XEXP (x, 0))
7443 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7444 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7445 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7446 && REGNO (XEXP (x, 0))
7447 <= LAST_VIRTUAL_POINTER_REGISTER))
7448 && GET_MODE_SIZE (mode) >= 4
7449 && CONST_INT_P (XEXP (x, 1))
7450 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7451 return 1;
7454 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7455 && GET_MODE_SIZE (mode) == 4
7456 && GET_CODE (x) == SYMBOL_REF
7457 && CONSTANT_POOL_ADDRESS_P (x)
7458 && ! (flag_pic
7459 && symbol_mentioned_p (get_pool_constant (x))
7460 && ! pcrel_constant_p (get_pool_constant (x))))
7461 return 1;
7463 return 0;
7466 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7467 instruction of mode MODE. */
7469 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7471 switch (GET_MODE_SIZE (mode))
7473 case 1:
7474 return val >= 0 && val < 32;
7476 case 2:
7477 return val >= 0 && val < 64 && (val & 1) == 0;
7479 default:
7480 return (val >= 0
7481 && (val + GET_MODE_SIZE (mode)) <= 128
7482 && (val & 3) == 0);
7486 bool
7487 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7489 if (TARGET_ARM)
7490 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7491 else if (TARGET_THUMB2)
7492 return thumb2_legitimate_address_p (mode, x, strict_p);
7493 else /* if (TARGET_THUMB1) */
7494 return thumb1_legitimate_address_p (mode, x, strict_p);
7497 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7499 Given an rtx X being reloaded into a reg required to be
7500 in class CLASS, return the class of reg to actually use.
7501 In general this is just CLASS, but for the Thumb core registers and
7502 immediate constants we prefer a LO_REGS class or a subset. */
7504 static reg_class_t
7505 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7507 if (TARGET_32BIT)
7508 return rclass;
7509 else
7511 if (rclass == GENERAL_REGS)
7512 return LO_REGS;
7513 else
7514 return rclass;
7518 /* Build the SYMBOL_REF for __tls_get_addr. */
7520 static GTY(()) rtx tls_get_addr_libfunc;
7522 static rtx
7523 get_tls_get_addr (void)
7525 if (!tls_get_addr_libfunc)
7526 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7527 return tls_get_addr_libfunc;
7531 arm_load_tp (rtx target)
7533 if (!target)
7534 target = gen_reg_rtx (SImode);
7536 if (TARGET_HARD_TP)
7538 /* Can return in any reg. */
7539 emit_insn (gen_load_tp_hard (target));
7541 else
7543 /* Always returned in r0. Immediately copy the result into a pseudo,
7544 otherwise other uses of r0 (e.g. setting up function arguments) may
7545 clobber the value. */
7547 rtx tmp;
7549 emit_insn (gen_load_tp_soft ());
7551 tmp = gen_rtx_REG (SImode, R0_REGNUM);
7552 emit_move_insn (target, tmp);
7554 return target;
7557 static rtx
7558 load_tls_operand (rtx x, rtx reg)
7560 rtx tmp;
7562 if (reg == NULL_RTX)
7563 reg = gen_reg_rtx (SImode);
7565 tmp = gen_rtx_CONST (SImode, x);
7567 emit_move_insn (reg, tmp);
7569 return reg;
7572 static rtx
7573 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7575 rtx insns, label, labelno, sum;
7577 gcc_assert (reloc != TLS_DESCSEQ);
7578 start_sequence ();
7580 labelno = GEN_INT (pic_labelno++);
7581 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7582 label = gen_rtx_CONST (VOIDmode, label);
7584 sum = gen_rtx_UNSPEC (Pmode,
7585 gen_rtvec (4, x, GEN_INT (reloc), label,
7586 GEN_INT (TARGET_ARM ? 8 : 4)),
7587 UNSPEC_TLS);
7588 reg = load_tls_operand (sum, reg);
7590 if (TARGET_ARM)
7591 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7592 else
7593 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7595 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7596 LCT_PURE, /* LCT_CONST? */
7597 Pmode, 1, reg, Pmode);
7599 insns = get_insns ();
7600 end_sequence ();
7602 return insns;
7605 static rtx
7606 arm_tls_descseq_addr (rtx x, rtx reg)
7608 rtx labelno = GEN_INT (pic_labelno++);
7609 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7610 rtx sum = gen_rtx_UNSPEC (Pmode,
7611 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7612 gen_rtx_CONST (VOIDmode, label),
7613 GEN_INT (!TARGET_ARM)),
7614 UNSPEC_TLS);
7615 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
7617 emit_insn (gen_tlscall (x, labelno));
7618 if (!reg)
7619 reg = gen_reg_rtx (SImode);
7620 else
7621 gcc_assert (REGNO (reg) != R0_REGNUM);
7623 emit_move_insn (reg, reg0);
7625 return reg;
7629 legitimize_tls_address (rtx x, rtx reg)
7631 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7632 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7634 switch (model)
7636 case TLS_MODEL_GLOBAL_DYNAMIC:
7637 if (TARGET_GNU2_TLS)
7639 reg = arm_tls_descseq_addr (x, reg);
7641 tp = arm_load_tp (NULL_RTX);
7643 dest = gen_rtx_PLUS (Pmode, tp, reg);
7645 else
7647 /* Original scheme */
7648 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7649 dest = gen_reg_rtx (Pmode);
7650 emit_libcall_block (insns, dest, ret, x);
7652 return dest;
7654 case TLS_MODEL_LOCAL_DYNAMIC:
7655 if (TARGET_GNU2_TLS)
7657 reg = arm_tls_descseq_addr (x, reg);
7659 tp = arm_load_tp (NULL_RTX);
7661 dest = gen_rtx_PLUS (Pmode, tp, reg);
7663 else
7665 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7667 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7668 share the LDM result with other LD model accesses. */
7669 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7670 UNSPEC_TLS);
7671 dest = gen_reg_rtx (Pmode);
7672 emit_libcall_block (insns, dest, ret, eqv);
7674 /* Load the addend. */
7675 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7676 GEN_INT (TLS_LDO32)),
7677 UNSPEC_TLS);
7678 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7679 dest = gen_rtx_PLUS (Pmode, dest, addend);
7681 return dest;
7683 case TLS_MODEL_INITIAL_EXEC:
7684 labelno = GEN_INT (pic_labelno++);
7685 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7686 label = gen_rtx_CONST (VOIDmode, label);
7687 sum = gen_rtx_UNSPEC (Pmode,
7688 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7689 GEN_INT (TARGET_ARM ? 8 : 4)),
7690 UNSPEC_TLS);
7691 reg = load_tls_operand (sum, reg);
7693 if (TARGET_ARM)
7694 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7695 else if (TARGET_THUMB2)
7696 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7697 else
7699 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7700 emit_move_insn (reg, gen_const_mem (SImode, reg));
7703 tp = arm_load_tp (NULL_RTX);
7705 return gen_rtx_PLUS (Pmode, tp, reg);
7707 case TLS_MODEL_LOCAL_EXEC:
7708 tp = arm_load_tp (NULL_RTX);
7710 reg = gen_rtx_UNSPEC (Pmode,
7711 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7712 UNSPEC_TLS);
7713 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7715 return gen_rtx_PLUS (Pmode, tp, reg);
7717 default:
7718 abort ();
7722 /* Try machine-dependent ways of modifying an illegitimate address
7723 to be legitimate. If we find one, return the new, valid address. */
7725 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7727 if (arm_tls_referenced_p (x))
7729 rtx addend = NULL;
7731 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7733 addend = XEXP (XEXP (x, 0), 1);
7734 x = XEXP (XEXP (x, 0), 0);
7737 if (GET_CODE (x) != SYMBOL_REF)
7738 return x;
7740 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7742 x = legitimize_tls_address (x, NULL_RTX);
7744 if (addend)
7746 x = gen_rtx_PLUS (SImode, x, addend);
7747 orig_x = x;
7749 else
7750 return x;
7753 if (!TARGET_ARM)
7755 /* TODO: legitimize_address for Thumb2. */
7756 if (TARGET_THUMB2)
7757 return x;
7758 return thumb_legitimize_address (x, orig_x, mode);
7761 if (GET_CODE (x) == PLUS)
7763 rtx xop0 = XEXP (x, 0);
7764 rtx xop1 = XEXP (x, 1);
7766 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7767 xop0 = force_reg (SImode, xop0);
7769 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7770 && !symbol_mentioned_p (xop1))
7771 xop1 = force_reg (SImode, xop1);
7773 if (ARM_BASE_REGISTER_RTX_P (xop0)
7774 && CONST_INT_P (xop1))
7776 HOST_WIDE_INT n, low_n;
7777 rtx base_reg, val;
7778 n = INTVAL (xop1);
7780 /* VFP addressing modes actually allow greater offsets, but for
7781 now we just stick with the lowest common denominator. */
7782 if (mode == DImode
7783 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7785 low_n = n & 0x0f;
7786 n &= ~0x0f;
7787 if (low_n > 4)
7789 n += 16;
7790 low_n -= 16;
7793 else
7795 low_n = ((mode) == TImode ? 0
7796 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7797 n -= low_n;
7800 base_reg = gen_reg_rtx (SImode);
7801 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7802 emit_move_insn (base_reg, val);
7803 x = plus_constant (Pmode, base_reg, low_n);
7805 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7806 x = gen_rtx_PLUS (SImode, xop0, xop1);
7809 /* XXX We don't allow MINUS any more -- see comment in
7810 arm_legitimate_address_outer_p (). */
7811 else if (GET_CODE (x) == MINUS)
7813 rtx xop0 = XEXP (x, 0);
7814 rtx xop1 = XEXP (x, 1);
7816 if (CONSTANT_P (xop0))
7817 xop0 = force_reg (SImode, xop0);
7819 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7820 xop1 = force_reg (SImode, xop1);
7822 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7823 x = gen_rtx_MINUS (SImode, xop0, xop1);
7826 /* Make sure to take full advantage of the pre-indexed addressing mode
7827 with absolute addresses which often allows for the base register to
7828 be factorized for multiple adjacent memory references, and it might
7829 even allows for the mini pool to be avoided entirely. */
7830 else if (CONST_INT_P (x) && optimize > 0)
7832 unsigned int bits;
7833 HOST_WIDE_INT mask, base, index;
7834 rtx base_reg;
7836 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7837 use a 8-bit index. So let's use a 12-bit index for SImode only and
7838 hope that arm_gen_constant will enable ldrb to use more bits. */
7839 bits = (mode == SImode) ? 12 : 8;
7840 mask = (1 << bits) - 1;
7841 base = INTVAL (x) & ~mask;
7842 index = INTVAL (x) & mask;
7843 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7845 /* It'll most probably be more efficient to generate the base
7846 with more bits set and use a negative index instead. */
7847 base |= mask;
7848 index -= mask;
7850 base_reg = force_reg (SImode, GEN_INT (base));
7851 x = plus_constant (Pmode, base_reg, index);
7854 if (flag_pic)
7856 /* We need to find and carefully transform any SYMBOL and LABEL
7857 references; so go back to the original address expression. */
7858 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7860 if (new_x != orig_x)
7861 x = new_x;
7864 return x;
7868 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7869 to be legitimate. If we find one, return the new, valid address. */
7871 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7873 if (GET_CODE (x) == PLUS
7874 && CONST_INT_P (XEXP (x, 1))
7875 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7876 || INTVAL (XEXP (x, 1)) < 0))
7878 rtx xop0 = XEXP (x, 0);
7879 rtx xop1 = XEXP (x, 1);
7880 HOST_WIDE_INT offset = INTVAL (xop1);
7882 /* Try and fold the offset into a biasing of the base register and
7883 then offsetting that. Don't do this when optimizing for space
7884 since it can cause too many CSEs. */
7885 if (optimize_size && offset >= 0
7886 && offset < 256 + 31 * GET_MODE_SIZE (mode))
7888 HOST_WIDE_INT delta;
7890 if (offset >= 256)
7891 delta = offset - (256 - GET_MODE_SIZE (mode));
7892 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7893 delta = 31 * GET_MODE_SIZE (mode);
7894 else
7895 delta = offset & (~31 * GET_MODE_SIZE (mode));
7897 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7898 NULL_RTX);
7899 x = plus_constant (Pmode, xop0, delta);
7901 else if (offset < 0 && offset > -256)
7902 /* Small negative offsets are best done with a subtract before the
7903 dereference, forcing these into a register normally takes two
7904 instructions. */
7905 x = force_operand (x, NULL_RTX);
7906 else
7908 /* For the remaining cases, force the constant into a register. */
7909 xop1 = force_reg (SImode, xop1);
7910 x = gen_rtx_PLUS (SImode, xop0, xop1);
7913 else if (GET_CODE (x) == PLUS
7914 && s_register_operand (XEXP (x, 1), SImode)
7915 && !s_register_operand (XEXP (x, 0), SImode))
7917 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7919 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7922 if (flag_pic)
7924 /* We need to find and carefully transform any SYMBOL and LABEL
7925 references; so go back to the original address expression. */
7926 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7928 if (new_x != orig_x)
7929 x = new_x;
7932 return x;
7935 bool
7936 arm_legitimize_reload_address (rtx *p,
7937 machine_mode mode,
7938 int opnum, int type,
7939 int ind_levels ATTRIBUTE_UNUSED)
7941 /* We must recognize output that we have already generated ourselves. */
7942 if (GET_CODE (*p) == PLUS
7943 && GET_CODE (XEXP (*p, 0)) == PLUS
7944 && REG_P (XEXP (XEXP (*p, 0), 0))
7945 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7946 && CONST_INT_P (XEXP (*p, 1)))
7948 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7949 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7950 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7951 return true;
7954 if (GET_CODE (*p) == PLUS
7955 && REG_P (XEXP (*p, 0))
7956 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7957 /* If the base register is equivalent to a constant, let the generic
7958 code handle it. Otherwise we will run into problems if a future
7959 reload pass decides to rematerialize the constant. */
7960 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7961 && CONST_INT_P (XEXP (*p, 1)))
7963 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7964 HOST_WIDE_INT low, high;
7966 /* Detect coprocessor load/stores. */
7967 bool coproc_p = ((TARGET_HARD_FLOAT
7968 && TARGET_VFP
7969 && (mode == SFmode || mode == DFmode))
7970 || (TARGET_REALLY_IWMMXT
7971 && VALID_IWMMXT_REG_MODE (mode))
7972 || (TARGET_NEON
7973 && (VALID_NEON_DREG_MODE (mode)
7974 || VALID_NEON_QREG_MODE (mode))));
7976 /* For some conditions, bail out when lower two bits are unaligned. */
7977 if ((val & 0x3) != 0
7978 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7979 && (coproc_p
7980 /* For DI, and DF under soft-float: */
7981 || ((mode == DImode || mode == DFmode)
7982 /* Without ldrd, we use stm/ldm, which does not
7983 fair well with unaligned bits. */
7984 && (! TARGET_LDRD
7985 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7986 || TARGET_THUMB2))))
7987 return false;
7989 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7990 of which the (reg+high) gets turned into a reload add insn,
7991 we try to decompose the index into high/low values that can often
7992 also lead to better reload CSE.
7993 For example:
7994 ldr r0, [r2, #4100] // Offset too large
7995 ldr r1, [r2, #4104] // Offset too large
7997 is best reloaded as:
7998 add t1, r2, #4096
7999 ldr r0, [t1, #4]
8000 add t2, r2, #4096
8001 ldr r1, [t2, #8]
8003 which post-reload CSE can simplify in most cases to eliminate the
8004 second add instruction:
8005 add t1, r2, #4096
8006 ldr r0, [t1, #4]
8007 ldr r1, [t1, #8]
8009 The idea here is that we want to split out the bits of the constant
8010 as a mask, rather than as subtracting the maximum offset that the
8011 respective type of load/store used can handle.
8013 When encountering negative offsets, we can still utilize it even if
8014 the overall offset is positive; sometimes this may lead to an immediate
8015 that can be constructed with fewer instructions.
8016 For example:
8017 ldr r0, [r2, #0x3FFFFC]
8019 This is best reloaded as:
8020 add t1, r2, #0x400000
8021 ldr r0, [t1, #-4]
8023 The trick for spotting this for a load insn with N bits of offset
8024 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
8025 negative offset that is going to make bit N and all the bits below
8026 it become zero in the remainder part.
8028 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
8029 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
8030 used in most cases of ARM load/store instructions. */
8032 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
8033 (((VAL) & ((1 << (N)) - 1)) \
8034 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
8035 : 0)
8037 if (coproc_p)
8039 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
8041 /* NEON quad-word load/stores are made of two double-word accesses,
8042 so the valid index range is reduced by 8. Treat as 9-bit range if
8043 we go over it. */
8044 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
8045 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
8047 else if (GET_MODE_SIZE (mode) == 8)
8049 if (TARGET_LDRD)
8050 low = (TARGET_THUMB2
8051 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
8052 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
8053 else
8054 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
8055 to access doublewords. The supported load/store offsets are
8056 -8, -4, and 4, which we try to produce here. */
8057 low = ((val & 0xf) ^ 0x8) - 0x8;
8059 else if (GET_MODE_SIZE (mode) < 8)
8061 /* NEON element load/stores do not have an offset. */
8062 if (TARGET_NEON_FP16 && mode == HFmode)
8063 return false;
8065 if (TARGET_THUMB2)
8067 /* Thumb-2 has an asymmetrical index range of (-256,4096).
8068 Try the wider 12-bit range first, and re-try if the result
8069 is out of range. */
8070 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
8071 if (low < -255)
8072 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
8074 else
8076 if (mode == HImode || mode == HFmode)
8078 if (arm_arch4)
8079 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
8080 else
8082 /* The storehi/movhi_bytes fallbacks can use only
8083 [-4094,+4094] of the full ldrb/strb index range. */
8084 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
8085 if (low == 4095 || low == -4095)
8086 return false;
8089 else
8090 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
8093 else
8094 return false;
8096 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
8097 ^ (unsigned HOST_WIDE_INT) 0x80000000)
8098 - (unsigned HOST_WIDE_INT) 0x80000000);
8099 /* Check for overflow or zero */
8100 if (low == 0 || high == 0 || (high + low != val))
8101 return false;
8103 /* Reload the high part into a base reg; leave the low part
8104 in the mem.
8105 Note that replacing this gen_rtx_PLUS with plus_constant is
8106 wrong in this case because we rely on the
8107 (plus (plus reg c1) c2) structure being preserved so that
8108 XEXP (*p, 0) in push_reload below uses the correct term. */
8109 *p = gen_rtx_PLUS (GET_MODE (*p),
8110 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
8111 GEN_INT (high)),
8112 GEN_INT (low));
8113 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
8114 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
8115 VOIDmode, 0, 0, opnum, (enum reload_type) type);
8116 return true;
8119 return false;
8123 thumb_legitimize_reload_address (rtx *x_p,
8124 machine_mode mode,
8125 int opnum, int type,
8126 int ind_levels ATTRIBUTE_UNUSED)
8128 rtx x = *x_p;
8130 if (GET_CODE (x) == PLUS
8131 && GET_MODE_SIZE (mode) < 4
8132 && REG_P (XEXP (x, 0))
8133 && XEXP (x, 0) == stack_pointer_rtx
8134 && CONST_INT_P (XEXP (x, 1))
8135 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8137 rtx orig_x = x;
8139 x = copy_rtx (x);
8140 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8141 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8142 return x;
8145 /* If both registers are hi-regs, then it's better to reload the
8146 entire expression rather than each register individually. That
8147 only requires one reload register rather than two. */
8148 if (GET_CODE (x) == PLUS
8149 && REG_P (XEXP (x, 0))
8150 && REG_P (XEXP (x, 1))
8151 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
8152 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
8154 rtx orig_x = x;
8156 x = copy_rtx (x);
8157 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8158 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8159 return x;
8162 return NULL;
8165 /* Return TRUE if X contains any TLS symbol references. */
8167 bool
8168 arm_tls_referenced_p (rtx x)
8170 if (! TARGET_HAVE_TLS)
8171 return false;
8173 subrtx_iterator::array_type array;
8174 FOR_EACH_SUBRTX (iter, array, x, ALL)
8176 const_rtx x = *iter;
8177 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8178 return true;
8180 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8181 TLS offsets, not real symbol references. */
8182 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8183 iter.skip_subrtxes ();
8185 return false;
8188 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8190 On the ARM, allow any integer (invalid ones are removed later by insn
8191 patterns), nice doubles and symbol_refs which refer to the function's
8192 constant pool XXX.
8194 When generating pic allow anything. */
8196 static bool
8197 arm_legitimate_constant_p_1 (machine_mode mode, rtx x)
8199 /* At present, we have no support for Neon structure constants, so forbid
8200 them here. It might be possible to handle simple cases like 0 and -1
8201 in future. */
8202 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8203 return false;
8205 return flag_pic || !label_mentioned_p (x);
8208 static bool
8209 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8211 return (CONST_INT_P (x)
8212 || CONST_DOUBLE_P (x)
8213 || CONSTANT_ADDRESS_P (x)
8214 || flag_pic);
8217 static bool
8218 arm_legitimate_constant_p (machine_mode mode, rtx x)
8220 return (!arm_cannot_force_const_mem (mode, x)
8221 && (TARGET_32BIT
8222 ? arm_legitimate_constant_p_1 (mode, x)
8223 : thumb_legitimate_constant_p (mode, x)));
8226 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8228 static bool
8229 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8231 rtx base, offset;
8233 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8235 split_const (x, &base, &offset);
8236 if (GET_CODE (base) == SYMBOL_REF
8237 && !offset_within_block_p (base, INTVAL (offset)))
8238 return true;
8240 return arm_tls_referenced_p (x);
8243 #define REG_OR_SUBREG_REG(X) \
8244 (REG_P (X) \
8245 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8247 #define REG_OR_SUBREG_RTX(X) \
8248 (REG_P (X) ? (X) : SUBREG_REG (X))
8250 static inline int
8251 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8253 machine_mode mode = GET_MODE (x);
8254 int total, words;
8256 switch (code)
8258 case ASHIFT:
8259 case ASHIFTRT:
8260 case LSHIFTRT:
8261 case ROTATERT:
8262 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8264 case PLUS:
8265 case MINUS:
8266 case COMPARE:
8267 case NEG:
8268 case NOT:
8269 return COSTS_N_INSNS (1);
8271 case MULT:
8272 if (CONST_INT_P (XEXP (x, 1)))
8274 int cycles = 0;
8275 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8277 while (i)
8279 i >>= 2;
8280 cycles++;
8282 return COSTS_N_INSNS (2) + cycles;
8284 return COSTS_N_INSNS (1) + 16;
8286 case SET:
8287 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8288 the mode. */
8289 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8290 return (COSTS_N_INSNS (words)
8291 + 4 * ((MEM_P (SET_SRC (x)))
8292 + MEM_P (SET_DEST (x))));
8294 case CONST_INT:
8295 if (outer == SET)
8297 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8298 return 0;
8299 if (thumb_shiftable_const (INTVAL (x)))
8300 return COSTS_N_INSNS (2);
8301 return COSTS_N_INSNS (3);
8303 else if ((outer == PLUS || outer == COMPARE)
8304 && INTVAL (x) < 256 && INTVAL (x) > -256)
8305 return 0;
8306 else if ((outer == IOR || outer == XOR || outer == AND)
8307 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8308 return COSTS_N_INSNS (1);
8309 else if (outer == AND)
8311 int i;
8312 /* This duplicates the tests in the andsi3 expander. */
8313 for (i = 9; i <= 31; i++)
8314 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8315 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8316 return COSTS_N_INSNS (2);
8318 else if (outer == ASHIFT || outer == ASHIFTRT
8319 || outer == LSHIFTRT)
8320 return 0;
8321 return COSTS_N_INSNS (2);
8323 case CONST:
8324 case CONST_DOUBLE:
8325 case LABEL_REF:
8326 case SYMBOL_REF:
8327 return COSTS_N_INSNS (3);
8329 case UDIV:
8330 case UMOD:
8331 case DIV:
8332 case MOD:
8333 return 100;
8335 case TRUNCATE:
8336 return 99;
8338 case AND:
8339 case XOR:
8340 case IOR:
8341 /* XXX guess. */
8342 return 8;
8344 case MEM:
8345 /* XXX another guess. */
8346 /* Memory costs quite a lot for the first word, but subsequent words
8347 load at the equivalent of a single insn each. */
8348 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8349 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8350 ? 4 : 0));
8352 case IF_THEN_ELSE:
8353 /* XXX a guess. */
8354 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8355 return 14;
8356 return 2;
8358 case SIGN_EXTEND:
8359 case ZERO_EXTEND:
8360 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8361 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8363 if (mode == SImode)
8364 return total;
8366 if (arm_arch6)
8367 return total + COSTS_N_INSNS (1);
8369 /* Assume a two-shift sequence. Increase the cost slightly so
8370 we prefer actual shifts over an extend operation. */
8371 return total + 1 + COSTS_N_INSNS (2);
8373 default:
8374 return 99;
8378 static inline bool
8379 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8381 machine_mode mode = GET_MODE (x);
8382 enum rtx_code subcode;
8383 rtx operand;
8384 enum rtx_code code = GET_CODE (x);
8385 *total = 0;
8387 switch (code)
8389 case MEM:
8390 /* Memory costs quite a lot for the first word, but subsequent words
8391 load at the equivalent of a single insn each. */
8392 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8393 return true;
8395 case DIV:
8396 case MOD:
8397 case UDIV:
8398 case UMOD:
8399 if (TARGET_HARD_FLOAT && mode == SFmode)
8400 *total = COSTS_N_INSNS (2);
8401 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8402 *total = COSTS_N_INSNS (4);
8403 else
8404 *total = COSTS_N_INSNS (20);
8405 return false;
8407 case ROTATE:
8408 if (REG_P (XEXP (x, 1)))
8409 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8410 else if (!CONST_INT_P (XEXP (x, 1)))
8411 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8413 /* Fall through */
8414 case ROTATERT:
8415 if (mode != SImode)
8417 *total += COSTS_N_INSNS (4);
8418 return true;
8421 /* Fall through */
8422 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8423 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8424 if (mode == DImode)
8426 *total += COSTS_N_INSNS (3);
8427 return true;
8430 *total += COSTS_N_INSNS (1);
8431 /* Increase the cost of complex shifts because they aren't any faster,
8432 and reduce dual issue opportunities. */
8433 if (arm_tune_cortex_a9
8434 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8435 ++*total;
8437 return true;
8439 case MINUS:
8440 if (mode == DImode)
8442 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8443 if (CONST_INT_P (XEXP (x, 0))
8444 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8446 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8447 return true;
8450 if (CONST_INT_P (XEXP (x, 1))
8451 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8453 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8454 return true;
8457 return false;
8460 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8462 if (TARGET_HARD_FLOAT
8463 && (mode == SFmode
8464 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8466 *total = COSTS_N_INSNS (1);
8467 if (CONST_DOUBLE_P (XEXP (x, 0))
8468 && arm_const_double_rtx (XEXP (x, 0)))
8470 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8471 return true;
8474 if (CONST_DOUBLE_P (XEXP (x, 1))
8475 && arm_const_double_rtx (XEXP (x, 1)))
8477 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8478 return true;
8481 return false;
8483 *total = COSTS_N_INSNS (20);
8484 return false;
8487 *total = COSTS_N_INSNS (1);
8488 if (CONST_INT_P (XEXP (x, 0))
8489 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8491 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8492 return true;
8495 subcode = GET_CODE (XEXP (x, 1));
8496 if (subcode == ASHIFT || subcode == ASHIFTRT
8497 || subcode == LSHIFTRT
8498 || subcode == ROTATE || subcode == ROTATERT)
8500 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8501 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8502 return true;
8505 /* A shift as a part of RSB costs no more than RSB itself. */
8506 if (GET_CODE (XEXP (x, 0)) == MULT
8507 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8509 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8510 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8511 return true;
8514 if (subcode == MULT
8515 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8517 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8518 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8519 return true;
8522 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8523 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8525 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8526 if (REG_P (XEXP (XEXP (x, 1), 0))
8527 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8528 *total += COSTS_N_INSNS (1);
8530 return true;
8533 /* Fall through */
8535 case PLUS:
8536 if (code == PLUS && arm_arch6 && mode == SImode
8537 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8538 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8540 *total = COSTS_N_INSNS (1);
8541 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8542 0, speed);
8543 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8544 return true;
8547 /* MLA: All arguments must be registers. We filter out
8548 multiplication by a power of two, so that we fall down into
8549 the code below. */
8550 if (GET_CODE (XEXP (x, 0)) == MULT
8551 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8553 /* The cost comes from the cost of the multiply. */
8554 return false;
8557 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8559 if (TARGET_HARD_FLOAT
8560 && (mode == SFmode
8561 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8563 *total = COSTS_N_INSNS (1);
8564 if (CONST_DOUBLE_P (XEXP (x, 1))
8565 && arm_const_double_rtx (XEXP (x, 1)))
8567 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8568 return true;
8571 return false;
8574 *total = COSTS_N_INSNS (20);
8575 return false;
8578 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8579 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8581 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8582 if (REG_P (XEXP (XEXP (x, 0), 0))
8583 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8584 *total += COSTS_N_INSNS (1);
8585 return true;
8588 /* Fall through */
8590 case AND: case XOR: case IOR:
8592 /* Normally the frame registers will be spilt into reg+const during
8593 reload, so it is a bad idea to combine them with other instructions,
8594 since then they might not be moved outside of loops. As a compromise
8595 we allow integration with ops that have a constant as their second
8596 operand. */
8597 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8598 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8599 && !CONST_INT_P (XEXP (x, 1)))
8600 *total = COSTS_N_INSNS (1);
8602 if (mode == DImode)
8604 *total += COSTS_N_INSNS (2);
8605 if (CONST_INT_P (XEXP (x, 1))
8606 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8608 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8609 return true;
8612 return false;
8615 *total += COSTS_N_INSNS (1);
8616 if (CONST_INT_P (XEXP (x, 1))
8617 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8619 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8620 return true;
8622 subcode = GET_CODE (XEXP (x, 0));
8623 if (subcode == ASHIFT || subcode == ASHIFTRT
8624 || subcode == LSHIFTRT
8625 || subcode == ROTATE || subcode == ROTATERT)
8627 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8628 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8629 return true;
8632 if (subcode == MULT
8633 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8635 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8636 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8637 return true;
8640 if (subcode == UMIN || subcode == UMAX
8641 || subcode == SMIN || subcode == SMAX)
8643 *total = COSTS_N_INSNS (3);
8644 return true;
8647 return false;
8649 case MULT:
8650 /* This should have been handled by the CPU specific routines. */
8651 gcc_unreachable ();
8653 case TRUNCATE:
8654 if (arm_arch3m && mode == SImode
8655 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8656 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8657 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8658 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8659 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8660 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8662 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8663 return true;
8665 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8666 return false;
8668 case NEG:
8669 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8671 if (TARGET_HARD_FLOAT
8672 && (mode == SFmode
8673 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8675 *total = COSTS_N_INSNS (1);
8676 return false;
8678 *total = COSTS_N_INSNS (2);
8679 return false;
8682 /* Fall through */
8683 case NOT:
8684 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8685 if (mode == SImode && code == NOT)
8687 subcode = GET_CODE (XEXP (x, 0));
8688 if (subcode == ASHIFT || subcode == ASHIFTRT
8689 || subcode == LSHIFTRT
8690 || subcode == ROTATE || subcode == ROTATERT
8691 || (subcode == MULT
8692 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8694 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8695 /* Register shifts cost an extra cycle. */
8696 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8697 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8698 subcode, 1, speed);
8699 return true;
8703 return false;
8705 case IF_THEN_ELSE:
8706 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8708 *total = COSTS_N_INSNS (4);
8709 return true;
8712 operand = XEXP (x, 0);
8714 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8715 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8716 && REG_P (XEXP (operand, 0))
8717 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8718 *total += COSTS_N_INSNS (1);
8719 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8720 + rtx_cost (XEXP (x, 2), code, 2, speed));
8721 return true;
8723 case NE:
8724 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8726 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8727 return true;
8729 goto scc_insn;
8731 case GE:
8732 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8733 && mode == SImode && XEXP (x, 1) == const0_rtx)
8735 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8736 return true;
8738 goto scc_insn;
8740 case LT:
8741 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8742 && mode == SImode && XEXP (x, 1) == const0_rtx)
8744 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8745 return true;
8747 goto scc_insn;
8749 case EQ:
8750 case GT:
8751 case LE:
8752 case GEU:
8753 case LTU:
8754 case GTU:
8755 case LEU:
8756 case UNORDERED:
8757 case ORDERED:
8758 case UNEQ:
8759 case UNGE:
8760 case UNLT:
8761 case UNGT:
8762 case UNLE:
8763 scc_insn:
8764 /* SCC insns. In the case where the comparison has already been
8765 performed, then they cost 2 instructions. Otherwise they need
8766 an additional comparison before them. */
8767 *total = COSTS_N_INSNS (2);
8768 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8770 return true;
8773 /* Fall through */
8774 case COMPARE:
8775 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8777 *total = 0;
8778 return true;
8781 *total += COSTS_N_INSNS (1);
8782 if (CONST_INT_P (XEXP (x, 1))
8783 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8785 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8786 return true;
8789 subcode = GET_CODE (XEXP (x, 0));
8790 if (subcode == ASHIFT || subcode == ASHIFTRT
8791 || subcode == LSHIFTRT
8792 || subcode == ROTATE || subcode == ROTATERT)
8794 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8795 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8796 return true;
8799 if (subcode == MULT
8800 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8802 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8803 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8804 return true;
8807 return false;
8809 case UMIN:
8810 case UMAX:
8811 case SMIN:
8812 case SMAX:
8813 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8814 if (!CONST_INT_P (XEXP (x, 1))
8815 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8816 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8817 return true;
8819 case ABS:
8820 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8822 if (TARGET_HARD_FLOAT
8823 && (mode == SFmode
8824 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8826 *total = COSTS_N_INSNS (1);
8827 return false;
8829 *total = COSTS_N_INSNS (20);
8830 return false;
8832 *total = COSTS_N_INSNS (1);
8833 if (mode == DImode)
8834 *total += COSTS_N_INSNS (3);
8835 return false;
8837 case SIGN_EXTEND:
8838 case ZERO_EXTEND:
8839 *total = 0;
8840 if (GET_MODE_CLASS (mode) == MODE_INT)
8842 rtx op = XEXP (x, 0);
8843 machine_mode opmode = GET_MODE (op);
8845 if (mode == DImode)
8846 *total += COSTS_N_INSNS (1);
8848 if (opmode != SImode)
8850 if (MEM_P (op))
8852 /* If !arm_arch4, we use one of the extendhisi2_mem
8853 or movhi_bytes patterns for HImode. For a QImode
8854 sign extension, we first zero-extend from memory
8855 and then perform a shift sequence. */
8856 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8857 *total += COSTS_N_INSNS (2);
8859 else if (arm_arch6)
8860 *total += COSTS_N_INSNS (1);
8862 /* We don't have the necessary insn, so we need to perform some
8863 other operation. */
8864 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8865 /* An and with constant 255. */
8866 *total += COSTS_N_INSNS (1);
8867 else
8868 /* A shift sequence. Increase costs slightly to avoid
8869 combining two shifts into an extend operation. */
8870 *total += COSTS_N_INSNS (2) + 1;
8873 return false;
8876 switch (GET_MODE (XEXP (x, 0)))
8878 case V8QImode:
8879 case V4HImode:
8880 case V2SImode:
8881 case V4QImode:
8882 case V2HImode:
8883 *total = COSTS_N_INSNS (1);
8884 return false;
8886 default:
8887 gcc_unreachable ();
8889 gcc_unreachable ();
8891 case ZERO_EXTRACT:
8892 case SIGN_EXTRACT:
8893 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8894 return true;
8896 case CONST_INT:
8897 if (const_ok_for_arm (INTVAL (x))
8898 || const_ok_for_arm (~INTVAL (x)))
8899 *total = COSTS_N_INSNS (1);
8900 else
8901 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8902 INTVAL (x), NULL_RTX,
8903 NULL_RTX, 0, 0));
8904 return true;
8906 case CONST:
8907 case LABEL_REF:
8908 case SYMBOL_REF:
8909 *total = COSTS_N_INSNS (3);
8910 return true;
8912 case HIGH:
8913 *total = COSTS_N_INSNS (1);
8914 return true;
8916 case LO_SUM:
8917 *total = COSTS_N_INSNS (1);
8918 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8919 return true;
8921 case CONST_DOUBLE:
8922 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8923 && (mode == SFmode || !TARGET_VFP_SINGLE))
8924 *total = COSTS_N_INSNS (1);
8925 else
8926 *total = COSTS_N_INSNS (4);
8927 return true;
8929 case SET:
8930 /* The vec_extract patterns accept memory operands that require an
8931 address reload. Account for the cost of that reload to give the
8932 auto-inc-dec pass an incentive to try to replace them. */
8933 if (TARGET_NEON && MEM_P (SET_DEST (x))
8934 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8936 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8937 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8938 *total += COSTS_N_INSNS (1);
8939 return true;
8941 /* Likewise for the vec_set patterns. */
8942 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8943 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8944 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8946 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8947 *total = rtx_cost (mem, code, 0, speed);
8948 if (!neon_vector_mem_operand (mem, 2, true))
8949 *total += COSTS_N_INSNS (1);
8950 return true;
8952 return false;
8954 case UNSPEC:
8955 /* We cost this as high as our memory costs to allow this to
8956 be hoisted from loops. */
8957 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8959 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8961 return true;
8963 case CONST_VECTOR:
8964 if (TARGET_NEON
8965 && TARGET_HARD_FLOAT
8966 && outer == SET
8967 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8968 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8969 *total = COSTS_N_INSNS (1);
8970 else
8971 *total = COSTS_N_INSNS (4);
8972 return true;
8974 default:
8975 *total = COSTS_N_INSNS (4);
8976 return false;
8980 /* Estimates the size cost of thumb1 instructions.
8981 For now most of the code is copied from thumb1_rtx_costs. We need more
8982 fine grain tuning when we have more related test cases. */
8983 static inline int
8984 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8986 machine_mode mode = GET_MODE (x);
8987 int words;
8989 switch (code)
8991 case ASHIFT:
8992 case ASHIFTRT:
8993 case LSHIFTRT:
8994 case ROTATERT:
8995 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8997 case PLUS:
8998 case MINUS:
8999 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9000 defined by RTL expansion, especially for the expansion of
9001 multiplication. */
9002 if ((GET_CODE (XEXP (x, 0)) == MULT
9003 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9004 || (GET_CODE (XEXP (x, 1)) == MULT
9005 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9006 return COSTS_N_INSNS (2);
9007 /* On purpose fall through for normal RTX. */
9008 case COMPARE:
9009 case NEG:
9010 case NOT:
9011 return COSTS_N_INSNS (1);
9013 case MULT:
9014 if (CONST_INT_P (XEXP (x, 1)))
9016 /* Thumb1 mul instruction can't operate on const. We must Load it
9017 into a register first. */
9018 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9019 /* For the targets which have a very small and high-latency multiply
9020 unit, we prefer to synthesize the mult with up to 5 instructions,
9021 giving a good balance between size and performance. */
9022 if (arm_arch6m && arm_m_profile_small_mul)
9023 return COSTS_N_INSNS (5);
9024 else
9025 return COSTS_N_INSNS (1) + const_size;
9027 return COSTS_N_INSNS (1);
9029 case SET:
9030 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9031 the mode. */
9032 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9033 return COSTS_N_INSNS (words)
9034 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x))
9035 || satisfies_constraint_K (SET_SRC (x))
9036 /* thumb1_movdi_insn. */
9037 || ((words > 1) && MEM_P (SET_SRC (x))));
9039 case CONST_INT:
9040 if (outer == SET)
9042 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
9043 return COSTS_N_INSNS (1);
9044 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9045 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9046 return COSTS_N_INSNS (2);
9047 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9048 if (thumb_shiftable_const (INTVAL (x)))
9049 return COSTS_N_INSNS (2);
9050 return COSTS_N_INSNS (3);
9052 else if ((outer == PLUS || outer == COMPARE)
9053 && INTVAL (x) < 256 && INTVAL (x) > -256)
9054 return 0;
9055 else if ((outer == IOR || outer == XOR || outer == AND)
9056 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9057 return COSTS_N_INSNS (1);
9058 else if (outer == AND)
9060 int i;
9061 /* This duplicates the tests in the andsi3 expander. */
9062 for (i = 9; i <= 31; i++)
9063 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
9064 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
9065 return COSTS_N_INSNS (2);
9067 else if (outer == ASHIFT || outer == ASHIFTRT
9068 || outer == LSHIFTRT)
9069 return 0;
9070 return COSTS_N_INSNS (2);
9072 case CONST:
9073 case CONST_DOUBLE:
9074 case LABEL_REF:
9075 case SYMBOL_REF:
9076 return COSTS_N_INSNS (3);
9078 case UDIV:
9079 case UMOD:
9080 case DIV:
9081 case MOD:
9082 return 100;
9084 case TRUNCATE:
9085 return 99;
9087 case AND:
9088 case XOR:
9089 case IOR:
9090 return COSTS_N_INSNS (1);
9092 case MEM:
9093 return (COSTS_N_INSNS (1)
9094 + COSTS_N_INSNS (1)
9095 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9096 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9097 ? COSTS_N_INSNS (1) : 0));
9099 case IF_THEN_ELSE:
9100 /* XXX a guess. */
9101 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9102 return 14;
9103 return 2;
9105 case ZERO_EXTEND:
9106 /* XXX still guessing. */
9107 switch (GET_MODE (XEXP (x, 0)))
9109 case QImode:
9110 return (1 + (mode == DImode ? 4 : 0)
9111 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9113 case HImode:
9114 return (4 + (mode == DImode ? 4 : 0)
9115 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9117 case SImode:
9118 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9120 default:
9121 return 99;
9124 default:
9125 return 99;
9129 /* RTX costs when optimizing for size. */
9130 static bool
9131 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9132 int *total)
9134 machine_mode mode = GET_MODE (x);
9135 if (TARGET_THUMB1)
9137 *total = thumb1_size_rtx_costs (x, code, outer_code);
9138 return true;
9141 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9142 switch (code)
9144 case MEM:
9145 /* A memory access costs 1 insn if the mode is small, or the address is
9146 a single register, otherwise it costs one insn per word. */
9147 if (REG_P (XEXP (x, 0)))
9148 *total = COSTS_N_INSNS (1);
9149 else if (flag_pic
9150 && GET_CODE (XEXP (x, 0)) == PLUS
9151 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9152 /* This will be split into two instructions.
9153 See arm.md:calculate_pic_address. */
9154 *total = COSTS_N_INSNS (2);
9155 else
9156 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9157 return true;
9159 case DIV:
9160 case MOD:
9161 case UDIV:
9162 case UMOD:
9163 /* Needs a libcall, so it costs about this. */
9164 *total = COSTS_N_INSNS (2);
9165 return false;
9167 case ROTATE:
9168 if (mode == SImode && REG_P (XEXP (x, 1)))
9170 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
9171 return true;
9173 /* Fall through */
9174 case ROTATERT:
9175 case ASHIFT:
9176 case LSHIFTRT:
9177 case ASHIFTRT:
9178 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9180 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
9181 return true;
9183 else if (mode == SImode)
9185 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
9186 /* Slightly disparage register shifts, but not by much. */
9187 if (!CONST_INT_P (XEXP (x, 1)))
9188 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
9189 return true;
9192 /* Needs a libcall. */
9193 *total = COSTS_N_INSNS (2);
9194 return false;
9196 case MINUS:
9197 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9198 && (mode == SFmode || !TARGET_VFP_SINGLE))
9200 *total = COSTS_N_INSNS (1);
9201 return false;
9204 if (mode == SImode)
9206 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9207 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9209 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9210 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9211 || subcode1 == ROTATE || subcode1 == ROTATERT
9212 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9213 || subcode1 == ASHIFTRT)
9215 /* It's just the cost of the two operands. */
9216 *total = 0;
9217 return false;
9220 *total = COSTS_N_INSNS (1);
9221 return false;
9224 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9225 return false;
9227 case PLUS:
9228 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9229 && (mode == SFmode || !TARGET_VFP_SINGLE))
9231 *total = COSTS_N_INSNS (1);
9232 return false;
9235 /* A shift as a part of ADD costs nothing. */
9236 if (GET_CODE (XEXP (x, 0)) == MULT
9237 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9239 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9240 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
9241 *total += rtx_cost (XEXP (x, 1), code, 1, false);
9242 return true;
9245 /* Fall through */
9246 case AND: case XOR: case IOR:
9247 if (mode == SImode)
9249 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9251 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9252 || subcode == LSHIFTRT || subcode == ASHIFTRT
9253 || (code == AND && subcode == NOT))
9255 /* It's just the cost of the two operands. */
9256 *total = 0;
9257 return false;
9261 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9262 return false;
9264 case MULT:
9265 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9266 return false;
9268 case NEG:
9269 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9270 && (mode == SFmode || !TARGET_VFP_SINGLE))
9272 *total = COSTS_N_INSNS (1);
9273 return false;
9276 /* Fall through */
9277 case NOT:
9278 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9280 return false;
9282 case IF_THEN_ELSE:
9283 *total = 0;
9284 return false;
9286 case COMPARE:
9287 if (cc_register (XEXP (x, 0), VOIDmode))
9288 * total = 0;
9289 else
9290 *total = COSTS_N_INSNS (1);
9291 return false;
9293 case ABS:
9294 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9295 && (mode == SFmode || !TARGET_VFP_SINGLE))
9296 *total = COSTS_N_INSNS (1);
9297 else
9298 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9299 return false;
9301 case SIGN_EXTEND:
9302 case ZERO_EXTEND:
9303 return arm_rtx_costs_1 (x, outer_code, total, 0);
9305 case CONST_INT:
9306 if (const_ok_for_arm (INTVAL (x)))
9307 /* A multiplication by a constant requires another instruction
9308 to load the constant to a register. */
9309 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9310 ? 1 : 0);
9311 else if (const_ok_for_arm (~INTVAL (x)))
9312 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9313 else if (const_ok_for_arm (-INTVAL (x)))
9315 if (outer_code == COMPARE || outer_code == PLUS
9316 || outer_code == MINUS)
9317 *total = 0;
9318 else
9319 *total = COSTS_N_INSNS (1);
9321 else
9322 *total = COSTS_N_INSNS (2);
9323 return true;
9325 case CONST:
9326 case LABEL_REF:
9327 case SYMBOL_REF:
9328 *total = COSTS_N_INSNS (2);
9329 return true;
9331 case CONST_DOUBLE:
9332 *total = COSTS_N_INSNS (4);
9333 return true;
9335 case CONST_VECTOR:
9336 if (TARGET_NEON
9337 && TARGET_HARD_FLOAT
9338 && outer_code == SET
9339 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9340 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9341 *total = COSTS_N_INSNS (1);
9342 else
9343 *total = COSTS_N_INSNS (4);
9344 return true;
9346 case HIGH:
9347 case LO_SUM:
9348 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9349 cost of these slightly. */
9350 *total = COSTS_N_INSNS (1) + 1;
9351 return true;
9353 case SET:
9354 return false;
9356 default:
9357 if (mode != VOIDmode)
9358 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9359 else
9360 *total = COSTS_N_INSNS (4); /* How knows? */
9361 return false;
9365 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9366 operand, then return the operand that is being shifted. If the shift
9367 is not by a constant, then set SHIFT_REG to point to the operand.
9368 Return NULL if OP is not a shifter operand. */
9369 static rtx
9370 shifter_op_p (rtx op, rtx *shift_reg)
9372 enum rtx_code code = GET_CODE (op);
9374 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9375 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9376 return XEXP (op, 0);
9377 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9378 return XEXP (op, 0);
9379 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9380 || code == ASHIFTRT)
9382 if (!CONST_INT_P (XEXP (op, 1)))
9383 *shift_reg = XEXP (op, 1);
9384 return XEXP (op, 0);
9387 return NULL;
9390 static bool
9391 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9393 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9394 gcc_assert (GET_CODE (x) == UNSPEC);
9396 switch (XINT (x, 1))
9398 case UNSPEC_UNALIGNED_LOAD:
9399 /* We can only do unaligned loads into the integer unit, and we can't
9400 use LDM or LDRD. */
9401 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9402 if (speed_p)
9403 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9404 + extra_cost->ldst.load_unaligned);
9406 #ifdef NOT_YET
9407 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9408 ADDR_SPACE_GENERIC, speed_p);
9409 #endif
9410 return true;
9412 case UNSPEC_UNALIGNED_STORE:
9413 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9414 if (speed_p)
9415 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9416 + extra_cost->ldst.store_unaligned);
9418 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9419 #ifdef NOT_YET
9420 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9421 ADDR_SPACE_GENERIC, speed_p);
9422 #endif
9423 return true;
9425 case UNSPEC_VRINTZ:
9426 case UNSPEC_VRINTP:
9427 case UNSPEC_VRINTM:
9428 case UNSPEC_VRINTR:
9429 case UNSPEC_VRINTX:
9430 case UNSPEC_VRINTA:
9431 *cost = COSTS_N_INSNS (1);
9432 if (speed_p)
9433 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9435 return true;
9436 default:
9437 *cost = COSTS_N_INSNS (2);
9438 break;
9440 return false;
9443 /* Cost of a libcall. We assume one insn per argument, an amount for the
9444 call (one insn for -Os) and then one for processing the result. */
9445 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9447 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9448 do \
9450 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9451 if (shift_op != NULL \
9452 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9454 if (shift_reg) \
9456 if (speed_p) \
9457 *cost += extra_cost->alu.arith_shift_reg; \
9458 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9460 else if (speed_p) \
9461 *cost += extra_cost->alu.arith_shift; \
9463 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9464 + rtx_cost (XEXP (x, 1 - IDX), \
9465 OP, 1, speed_p)); \
9466 return true; \
9469 while (0);
9471 /* RTX costs. Make an estimate of the cost of executing the operation
9472 X, which is contained with an operation with code OUTER_CODE.
9473 SPEED_P indicates whether the cost desired is the performance cost,
9474 or the size cost. The estimate is stored in COST and the return
9475 value is TRUE if the cost calculation is final, or FALSE if the
9476 caller should recurse through the operands of X to add additional
9477 costs.
9479 We currently make no attempt to model the size savings of Thumb-2
9480 16-bit instructions. At the normal points in compilation where
9481 this code is called we have no measure of whether the condition
9482 flags are live or not, and thus no realistic way to determine what
9483 the size will eventually be. */
9484 static bool
9485 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9486 const struct cpu_cost_table *extra_cost,
9487 int *cost, bool speed_p)
9489 machine_mode mode = GET_MODE (x);
9491 if (TARGET_THUMB1)
9493 if (speed_p)
9494 *cost = thumb1_rtx_costs (x, code, outer_code);
9495 else
9496 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9497 return true;
9500 switch (code)
9502 case SET:
9503 *cost = 0;
9504 /* SET RTXs don't have a mode so we get it from the destination. */
9505 mode = GET_MODE (SET_DEST (x));
9507 if (REG_P (SET_SRC (x))
9508 && REG_P (SET_DEST (x)))
9510 /* Assume that most copies can be done with a single insn,
9511 unless we don't have HW FP, in which case everything
9512 larger than word mode will require two insns. */
9513 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9514 && GET_MODE_SIZE (mode) > 4)
9515 || mode == DImode)
9516 ? 2 : 1);
9517 /* Conditional register moves can be encoded
9518 in 16 bits in Thumb mode. */
9519 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9520 *cost >>= 1;
9522 return true;
9525 if (CONST_INT_P (SET_SRC (x)))
9527 /* Handle CONST_INT here, since the value doesn't have a mode
9528 and we would otherwise be unable to work out the true cost. */
9529 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9530 outer_code = SET;
9531 /* Slightly lower the cost of setting a core reg to a constant.
9532 This helps break up chains and allows for better scheduling. */
9533 if (REG_P (SET_DEST (x))
9534 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9535 *cost -= 1;
9536 x = SET_SRC (x);
9537 /* Immediate moves with an immediate in the range [0, 255] can be
9538 encoded in 16 bits in Thumb mode. */
9539 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9540 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9541 *cost >>= 1;
9542 goto const_int_cost;
9545 return false;
9547 case MEM:
9548 /* A memory access costs 1 insn if the mode is small, or the address is
9549 a single register, otherwise it costs one insn per word. */
9550 if (REG_P (XEXP (x, 0)))
9551 *cost = COSTS_N_INSNS (1);
9552 else if (flag_pic
9553 && GET_CODE (XEXP (x, 0)) == PLUS
9554 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9555 /* This will be split into two instructions.
9556 See arm.md:calculate_pic_address. */
9557 *cost = COSTS_N_INSNS (2);
9558 else
9559 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9561 /* For speed optimizations, add the costs of the address and
9562 accessing memory. */
9563 if (speed_p)
9564 #ifdef NOT_YET
9565 *cost += (extra_cost->ldst.load
9566 + arm_address_cost (XEXP (x, 0), mode,
9567 ADDR_SPACE_GENERIC, speed_p));
9568 #else
9569 *cost += extra_cost->ldst.load;
9570 #endif
9571 return true;
9573 case PARALLEL:
9575 /* Calculations of LDM costs are complex. We assume an initial cost
9576 (ldm_1st) which will load the number of registers mentioned in
9577 ldm_regs_per_insn_1st registers; then each additional
9578 ldm_regs_per_insn_subsequent registers cost one more insn. The
9579 formula for N regs is thus:
9581 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9582 + ldm_regs_per_insn_subsequent - 1)
9583 / ldm_regs_per_insn_subsequent).
9585 Additional costs may also be added for addressing. A similar
9586 formula is used for STM. */
9588 bool is_ldm = load_multiple_operation (x, SImode);
9589 bool is_stm = store_multiple_operation (x, SImode);
9591 *cost = COSTS_N_INSNS (1);
9593 if (is_ldm || is_stm)
9595 if (speed_p)
9597 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9598 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9599 ? extra_cost->ldst.ldm_regs_per_insn_1st
9600 : extra_cost->ldst.stm_regs_per_insn_1st;
9601 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9602 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9603 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9605 *cost += regs_per_insn_1st
9606 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9607 + regs_per_insn_sub - 1)
9608 / regs_per_insn_sub);
9609 return true;
9613 return false;
9615 case DIV:
9616 case UDIV:
9617 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9618 && (mode == SFmode || !TARGET_VFP_SINGLE))
9619 *cost = COSTS_N_INSNS (speed_p
9620 ? extra_cost->fp[mode != SFmode].div : 1);
9621 else if (mode == SImode && TARGET_IDIV)
9622 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9623 else
9624 *cost = LIBCALL_COST (2);
9625 return false; /* All arguments must be in registers. */
9627 case MOD:
9628 case UMOD:
9629 *cost = LIBCALL_COST (2);
9630 return false; /* All arguments must be in registers. */
9632 case ROTATE:
9633 if (mode == SImode && REG_P (XEXP (x, 1)))
9635 *cost = (COSTS_N_INSNS (2)
9636 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9637 if (speed_p)
9638 *cost += extra_cost->alu.shift_reg;
9639 return true;
9641 /* Fall through */
9642 case ROTATERT:
9643 case ASHIFT:
9644 case LSHIFTRT:
9645 case ASHIFTRT:
9646 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9648 *cost = (COSTS_N_INSNS (3)
9649 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9650 if (speed_p)
9651 *cost += 2 * extra_cost->alu.shift;
9652 return true;
9654 else if (mode == SImode)
9656 *cost = (COSTS_N_INSNS (1)
9657 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9658 /* Slightly disparage register shifts at -Os, but not by much. */
9659 if (!CONST_INT_P (XEXP (x, 1)))
9660 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9661 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9662 return true;
9664 else if (GET_MODE_CLASS (mode) == MODE_INT
9665 && GET_MODE_SIZE (mode) < 4)
9667 if (code == ASHIFT)
9669 *cost = (COSTS_N_INSNS (1)
9670 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9671 /* Slightly disparage register shifts at -Os, but not by
9672 much. */
9673 if (!CONST_INT_P (XEXP (x, 1)))
9674 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9675 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9677 else if (code == LSHIFTRT || code == ASHIFTRT)
9679 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9681 /* Can use SBFX/UBFX. */
9682 *cost = COSTS_N_INSNS (1);
9683 if (speed_p)
9684 *cost += extra_cost->alu.bfx;
9685 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9687 else
9689 *cost = COSTS_N_INSNS (2);
9690 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9691 if (speed_p)
9693 if (CONST_INT_P (XEXP (x, 1)))
9694 *cost += 2 * extra_cost->alu.shift;
9695 else
9696 *cost += (extra_cost->alu.shift
9697 + extra_cost->alu.shift_reg);
9699 else
9700 /* Slightly disparage register shifts. */
9701 *cost += !CONST_INT_P (XEXP (x, 1));
9704 else /* Rotates. */
9706 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9707 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9708 if (speed_p)
9710 if (CONST_INT_P (XEXP (x, 1)))
9711 *cost += (2 * extra_cost->alu.shift
9712 + extra_cost->alu.log_shift);
9713 else
9714 *cost += (extra_cost->alu.shift
9715 + extra_cost->alu.shift_reg
9716 + extra_cost->alu.log_shift_reg);
9719 return true;
9722 *cost = LIBCALL_COST (2);
9723 return false;
9725 case BSWAP:
9726 if (arm_arch6)
9728 if (mode == SImode)
9730 *cost = COSTS_N_INSNS (1);
9731 if (speed_p)
9732 *cost += extra_cost->alu.rev;
9734 return false;
9737 else
9739 /* No rev instruction available. Look at arm_legacy_rev
9740 and thumb_legacy_rev for the form of RTL used then. */
9741 if (TARGET_THUMB)
9743 *cost = COSTS_N_INSNS (10);
9745 if (speed_p)
9747 *cost += 6 * extra_cost->alu.shift;
9748 *cost += 3 * extra_cost->alu.logical;
9751 else
9753 *cost = COSTS_N_INSNS (5);
9755 if (speed_p)
9757 *cost += 2 * extra_cost->alu.shift;
9758 *cost += extra_cost->alu.arith_shift;
9759 *cost += 2 * extra_cost->alu.logical;
9762 return true;
9764 return false;
9766 case MINUS:
9767 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9768 && (mode == SFmode || !TARGET_VFP_SINGLE))
9770 *cost = COSTS_N_INSNS (1);
9771 if (GET_CODE (XEXP (x, 0)) == MULT
9772 || GET_CODE (XEXP (x, 1)) == MULT)
9774 rtx mul_op0, mul_op1, sub_op;
9776 if (speed_p)
9777 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9779 if (GET_CODE (XEXP (x, 0)) == MULT)
9781 mul_op0 = XEXP (XEXP (x, 0), 0);
9782 mul_op1 = XEXP (XEXP (x, 0), 1);
9783 sub_op = XEXP (x, 1);
9785 else
9787 mul_op0 = XEXP (XEXP (x, 1), 0);
9788 mul_op1 = XEXP (XEXP (x, 1), 1);
9789 sub_op = XEXP (x, 0);
9792 /* The first operand of the multiply may be optionally
9793 negated. */
9794 if (GET_CODE (mul_op0) == NEG)
9795 mul_op0 = XEXP (mul_op0, 0);
9797 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9798 + rtx_cost (mul_op1, code, 0, speed_p)
9799 + rtx_cost (sub_op, code, 0, speed_p));
9801 return true;
9804 if (speed_p)
9805 *cost += extra_cost->fp[mode != SFmode].addsub;
9806 return false;
9809 if (mode == SImode)
9811 rtx shift_by_reg = NULL;
9812 rtx shift_op;
9813 rtx non_shift_op;
9815 *cost = COSTS_N_INSNS (1);
9817 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9818 if (shift_op == NULL)
9820 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9821 non_shift_op = XEXP (x, 0);
9823 else
9824 non_shift_op = XEXP (x, 1);
9826 if (shift_op != NULL)
9828 if (shift_by_reg != NULL)
9830 if (speed_p)
9831 *cost += extra_cost->alu.arith_shift_reg;
9832 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9834 else if (speed_p)
9835 *cost += extra_cost->alu.arith_shift;
9837 *cost += (rtx_cost (shift_op, code, 0, speed_p)
9838 + rtx_cost (non_shift_op, code, 0, speed_p));
9839 return true;
9842 if (arm_arch_thumb2
9843 && GET_CODE (XEXP (x, 1)) == MULT)
9845 /* MLS. */
9846 if (speed_p)
9847 *cost += extra_cost->mult[0].add;
9848 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9849 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9850 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9851 return true;
9854 if (CONST_INT_P (XEXP (x, 0)))
9856 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9857 INTVAL (XEXP (x, 0)), NULL_RTX,
9858 NULL_RTX, 1, 0);
9859 *cost = COSTS_N_INSNS (insns);
9860 if (speed_p)
9861 *cost += insns * extra_cost->alu.arith;
9862 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9863 return true;
9865 else if (speed_p)
9866 *cost += extra_cost->alu.arith;
9868 return false;
9871 if (GET_MODE_CLASS (mode) == MODE_INT
9872 && GET_MODE_SIZE (mode) < 4)
9874 rtx shift_op, shift_reg;
9875 shift_reg = NULL;
9877 /* We check both sides of the MINUS for shifter operands since,
9878 unlike PLUS, it's not commutative. */
9880 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9881 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9883 /* Slightly disparage, as we might need to widen the result. */
9884 *cost = 1 + COSTS_N_INSNS (1);
9885 if (speed_p)
9886 *cost += extra_cost->alu.arith;
9888 if (CONST_INT_P (XEXP (x, 0)))
9890 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9891 return true;
9894 return false;
9897 if (mode == DImode)
9899 *cost = COSTS_N_INSNS (2);
9901 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9903 rtx op1 = XEXP (x, 1);
9905 if (speed_p)
9906 *cost += 2 * extra_cost->alu.arith;
9908 if (GET_CODE (op1) == ZERO_EXTEND)
9909 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9910 else
9911 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9912 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9913 0, speed_p);
9914 return true;
9916 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9918 if (speed_p)
9919 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9920 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9921 0, speed_p)
9922 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9923 return true;
9925 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9926 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9928 if (speed_p)
9929 *cost += (extra_cost->alu.arith
9930 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9931 ? extra_cost->alu.arith
9932 : extra_cost->alu.arith_shift));
9933 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9934 + rtx_cost (XEXP (XEXP (x, 1), 0),
9935 GET_CODE (XEXP (x, 1)), 0, speed_p));
9936 return true;
9939 if (speed_p)
9940 *cost += 2 * extra_cost->alu.arith;
9941 return false;
9944 /* Vector mode? */
9946 *cost = LIBCALL_COST (2);
9947 return false;
9949 case PLUS:
9950 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9951 && (mode == SFmode || !TARGET_VFP_SINGLE))
9953 *cost = COSTS_N_INSNS (1);
9954 if (GET_CODE (XEXP (x, 0)) == MULT)
9956 rtx mul_op0, mul_op1, add_op;
9958 if (speed_p)
9959 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9961 mul_op0 = XEXP (XEXP (x, 0), 0);
9962 mul_op1 = XEXP (XEXP (x, 0), 1);
9963 add_op = XEXP (x, 1);
9965 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9966 + rtx_cost (mul_op1, code, 0, speed_p)
9967 + rtx_cost (add_op, code, 0, speed_p));
9969 return true;
9972 if (speed_p)
9973 *cost += extra_cost->fp[mode != SFmode].addsub;
9974 return false;
9976 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9978 *cost = LIBCALL_COST (2);
9979 return false;
9982 /* Narrow modes can be synthesized in SImode, but the range
9983 of useful sub-operations is limited. Check for shift operations
9984 on one of the operands. Only left shifts can be used in the
9985 narrow modes. */
9986 if (GET_MODE_CLASS (mode) == MODE_INT
9987 && GET_MODE_SIZE (mode) < 4)
9989 rtx shift_op, shift_reg;
9990 shift_reg = NULL;
9992 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9994 if (CONST_INT_P (XEXP (x, 1)))
9996 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9997 INTVAL (XEXP (x, 1)), NULL_RTX,
9998 NULL_RTX, 1, 0);
9999 *cost = COSTS_N_INSNS (insns);
10000 if (speed_p)
10001 *cost += insns * extra_cost->alu.arith;
10002 /* Slightly penalize a narrow operation as the result may
10003 need widening. */
10004 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
10005 return true;
10008 /* Slightly penalize a narrow operation as the result may
10009 need widening. */
10010 *cost = 1 + COSTS_N_INSNS (1);
10011 if (speed_p)
10012 *cost += extra_cost->alu.arith;
10014 return false;
10017 if (mode == SImode)
10019 rtx shift_op, shift_reg;
10021 *cost = COSTS_N_INSNS (1);
10022 if (TARGET_INT_SIMD
10023 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10024 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10026 /* UXTA[BH] or SXTA[BH]. */
10027 if (speed_p)
10028 *cost += extra_cost->alu.extend_arith;
10029 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
10030 speed_p)
10031 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
10032 return true;
10035 shift_reg = NULL;
10036 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10037 if (shift_op != NULL)
10039 if (shift_reg)
10041 if (speed_p)
10042 *cost += extra_cost->alu.arith_shift_reg;
10043 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10045 else if (speed_p)
10046 *cost += extra_cost->alu.arith_shift;
10048 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10049 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10050 return true;
10052 if (GET_CODE (XEXP (x, 0)) == MULT)
10054 rtx mul_op = XEXP (x, 0);
10056 *cost = COSTS_N_INSNS (1);
10058 if (TARGET_DSP_MULTIPLY
10059 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10060 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10061 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10062 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10063 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10064 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10065 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10066 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10067 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10068 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10069 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10070 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10071 == 16))))))
10073 /* SMLA[BT][BT]. */
10074 if (speed_p)
10075 *cost += extra_cost->mult[0].extend_add;
10076 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
10077 SIGN_EXTEND, 0, speed_p)
10078 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
10079 SIGN_EXTEND, 0, speed_p)
10080 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10081 return true;
10084 if (speed_p)
10085 *cost += extra_cost->mult[0].add;
10086 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
10087 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
10088 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10089 return true;
10091 if (CONST_INT_P (XEXP (x, 1)))
10093 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10094 INTVAL (XEXP (x, 1)), NULL_RTX,
10095 NULL_RTX, 1, 0);
10096 *cost = COSTS_N_INSNS (insns);
10097 if (speed_p)
10098 *cost += insns * extra_cost->alu.arith;
10099 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
10100 return true;
10102 else if (speed_p)
10103 *cost += extra_cost->alu.arith;
10105 return false;
10108 if (mode == DImode)
10110 if (arm_arch3m
10111 && GET_CODE (XEXP (x, 0)) == MULT
10112 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10113 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10114 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10115 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10117 *cost = COSTS_N_INSNS (1);
10118 if (speed_p)
10119 *cost += extra_cost->mult[1].extend_add;
10120 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
10121 ZERO_EXTEND, 0, speed_p)
10122 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
10123 ZERO_EXTEND, 0, speed_p)
10124 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10125 return true;
10128 *cost = COSTS_N_INSNS (2);
10130 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10131 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10133 if (speed_p)
10134 *cost += (extra_cost->alu.arith
10135 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10136 ? extra_cost->alu.arith
10137 : extra_cost->alu.arith_shift));
10139 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
10140 speed_p)
10141 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10142 return true;
10145 if (speed_p)
10146 *cost += 2 * extra_cost->alu.arith;
10147 return false;
10150 /* Vector mode? */
10151 *cost = LIBCALL_COST (2);
10152 return false;
10153 case IOR:
10154 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10156 *cost = COSTS_N_INSNS (1);
10157 if (speed_p)
10158 *cost += extra_cost->alu.rev;
10160 return true;
10162 /* Fall through. */
10163 case AND: case XOR:
10164 if (mode == SImode)
10166 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10167 rtx op0 = XEXP (x, 0);
10168 rtx shift_op, shift_reg;
10170 *cost = COSTS_N_INSNS (1);
10172 if (subcode == NOT
10173 && (code == AND
10174 || (code == IOR && TARGET_THUMB2)))
10175 op0 = XEXP (op0, 0);
10177 shift_reg = NULL;
10178 shift_op = shifter_op_p (op0, &shift_reg);
10179 if (shift_op != NULL)
10181 if (shift_reg)
10183 if (speed_p)
10184 *cost += extra_cost->alu.log_shift_reg;
10185 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10187 else if (speed_p)
10188 *cost += extra_cost->alu.log_shift;
10190 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10191 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10192 return true;
10195 if (CONST_INT_P (XEXP (x, 1)))
10197 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10198 INTVAL (XEXP (x, 1)), NULL_RTX,
10199 NULL_RTX, 1, 0);
10201 *cost = COSTS_N_INSNS (insns);
10202 if (speed_p)
10203 *cost += insns * extra_cost->alu.logical;
10204 *cost += rtx_cost (op0, code, 0, speed_p);
10205 return true;
10208 if (speed_p)
10209 *cost += extra_cost->alu.logical;
10210 *cost += (rtx_cost (op0, code, 0, speed_p)
10211 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10212 return true;
10215 if (mode == DImode)
10217 rtx op0 = XEXP (x, 0);
10218 enum rtx_code subcode = GET_CODE (op0);
10220 *cost = COSTS_N_INSNS (2);
10222 if (subcode == NOT
10223 && (code == AND
10224 || (code == IOR && TARGET_THUMB2)))
10225 op0 = XEXP (op0, 0);
10227 if (GET_CODE (op0) == ZERO_EXTEND)
10229 if (speed_p)
10230 *cost += 2 * extra_cost->alu.logical;
10232 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
10233 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10234 return true;
10236 else if (GET_CODE (op0) == SIGN_EXTEND)
10238 if (speed_p)
10239 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10241 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
10242 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10243 return true;
10246 if (speed_p)
10247 *cost += 2 * extra_cost->alu.logical;
10249 return true;
10251 /* Vector mode? */
10253 *cost = LIBCALL_COST (2);
10254 return false;
10256 case MULT:
10257 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10258 && (mode == SFmode || !TARGET_VFP_SINGLE))
10260 rtx op0 = XEXP (x, 0);
10262 *cost = COSTS_N_INSNS (1);
10264 if (GET_CODE (op0) == NEG)
10265 op0 = XEXP (op0, 0);
10267 if (speed_p)
10268 *cost += extra_cost->fp[mode != SFmode].mult;
10270 *cost += (rtx_cost (op0, MULT, 0, speed_p)
10271 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10272 return true;
10274 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10276 *cost = LIBCALL_COST (2);
10277 return false;
10280 if (mode == SImode)
10282 *cost = COSTS_N_INSNS (1);
10283 if (TARGET_DSP_MULTIPLY
10284 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10285 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10286 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10287 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10288 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10289 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10290 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10291 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10292 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10293 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10294 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10295 && (INTVAL (XEXP (XEXP (x, 1), 1))
10296 == 16))))))
10298 /* SMUL[TB][TB]. */
10299 if (speed_p)
10300 *cost += extra_cost->mult[0].extend;
10301 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10302 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10303 return true;
10305 if (speed_p)
10306 *cost += extra_cost->mult[0].simple;
10307 return false;
10310 if (mode == DImode)
10312 if (arm_arch3m
10313 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10314 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10315 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10316 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10318 *cost = COSTS_N_INSNS (1);
10319 if (speed_p)
10320 *cost += extra_cost->mult[1].extend;
10321 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10322 ZERO_EXTEND, 0, speed_p)
10323 + rtx_cost (XEXP (XEXP (x, 1), 0),
10324 ZERO_EXTEND, 0, speed_p));
10325 return true;
10328 *cost = LIBCALL_COST (2);
10329 return false;
10332 /* Vector mode? */
10333 *cost = LIBCALL_COST (2);
10334 return false;
10336 case NEG:
10337 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10338 && (mode == SFmode || !TARGET_VFP_SINGLE))
10340 *cost = COSTS_N_INSNS (1);
10341 if (speed_p)
10342 *cost += extra_cost->fp[mode != SFmode].neg;
10344 return false;
10346 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10348 *cost = LIBCALL_COST (1);
10349 return false;
10352 if (mode == SImode)
10354 if (GET_CODE (XEXP (x, 0)) == ABS)
10356 *cost = COSTS_N_INSNS (2);
10357 /* Assume the non-flag-changing variant. */
10358 if (speed_p)
10359 *cost += (extra_cost->alu.log_shift
10360 + extra_cost->alu.arith_shift);
10361 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10362 return true;
10365 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10366 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10368 *cost = COSTS_N_INSNS (2);
10369 /* No extra cost for MOV imm and MVN imm. */
10370 /* If the comparison op is using the flags, there's no further
10371 cost, otherwise we need to add the cost of the comparison. */
10372 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10373 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10374 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10376 *cost += (COSTS_N_INSNS (1)
10377 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10378 speed_p)
10379 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10380 speed_p));
10381 if (speed_p)
10382 *cost += extra_cost->alu.arith;
10384 return true;
10386 *cost = COSTS_N_INSNS (1);
10387 if (speed_p)
10388 *cost += extra_cost->alu.arith;
10389 return false;
10392 if (GET_MODE_CLASS (mode) == MODE_INT
10393 && GET_MODE_SIZE (mode) < 4)
10395 /* Slightly disparage, as we might need an extend operation. */
10396 *cost = 1 + COSTS_N_INSNS (1);
10397 if (speed_p)
10398 *cost += extra_cost->alu.arith;
10399 return false;
10402 if (mode == DImode)
10404 *cost = COSTS_N_INSNS (2);
10405 if (speed_p)
10406 *cost += 2 * extra_cost->alu.arith;
10407 return false;
10410 /* Vector mode? */
10411 *cost = LIBCALL_COST (1);
10412 return false;
10414 case NOT:
10415 if (mode == SImode)
10417 rtx shift_op;
10418 rtx shift_reg = NULL;
10420 *cost = COSTS_N_INSNS (1);
10421 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10423 if (shift_op)
10425 if (shift_reg != NULL)
10427 if (speed_p)
10428 *cost += extra_cost->alu.log_shift_reg;
10429 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10431 else if (speed_p)
10432 *cost += extra_cost->alu.log_shift;
10433 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10434 return true;
10437 if (speed_p)
10438 *cost += extra_cost->alu.logical;
10439 return false;
10441 if (mode == DImode)
10443 *cost = COSTS_N_INSNS (2);
10444 return false;
10447 /* Vector mode? */
10449 *cost += LIBCALL_COST (1);
10450 return false;
10452 case IF_THEN_ELSE:
10454 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10456 *cost = COSTS_N_INSNS (4);
10457 return true;
10459 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10460 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10462 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10463 /* Assume that if one arm of the if_then_else is a register,
10464 that it will be tied with the result and eliminate the
10465 conditional insn. */
10466 if (REG_P (XEXP (x, 1)))
10467 *cost += op2cost;
10468 else if (REG_P (XEXP (x, 2)))
10469 *cost += op1cost;
10470 else
10472 if (speed_p)
10474 if (extra_cost->alu.non_exec_costs_exec)
10475 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10476 else
10477 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10479 else
10480 *cost += op1cost + op2cost;
10483 return true;
10485 case COMPARE:
10486 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10487 *cost = 0;
10488 else
10490 machine_mode op0mode;
10491 /* We'll mostly assume that the cost of a compare is the cost of the
10492 LHS. However, there are some notable exceptions. */
10494 /* Floating point compares are never done as side-effects. */
10495 op0mode = GET_MODE (XEXP (x, 0));
10496 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10497 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10499 *cost = COSTS_N_INSNS (1);
10500 if (speed_p)
10501 *cost += extra_cost->fp[op0mode != SFmode].compare;
10503 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10505 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10506 return true;
10509 return false;
10511 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10513 *cost = LIBCALL_COST (2);
10514 return false;
10517 /* DImode compares normally take two insns. */
10518 if (op0mode == DImode)
10520 *cost = COSTS_N_INSNS (2);
10521 if (speed_p)
10522 *cost += 2 * extra_cost->alu.arith;
10523 return false;
10526 if (op0mode == SImode)
10528 rtx shift_op;
10529 rtx shift_reg;
10531 if (XEXP (x, 1) == const0_rtx
10532 && !(REG_P (XEXP (x, 0))
10533 || (GET_CODE (XEXP (x, 0)) == SUBREG
10534 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10536 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10538 /* Multiply operations that set the flags are often
10539 significantly more expensive. */
10540 if (speed_p
10541 && GET_CODE (XEXP (x, 0)) == MULT
10542 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10543 *cost += extra_cost->mult[0].flag_setting;
10545 if (speed_p
10546 && GET_CODE (XEXP (x, 0)) == PLUS
10547 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10548 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10549 0), 1), mode))
10550 *cost += extra_cost->mult[0].flag_setting;
10551 return true;
10554 shift_reg = NULL;
10555 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10556 if (shift_op != NULL)
10558 *cost = COSTS_N_INSNS (1);
10559 if (shift_reg != NULL)
10561 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10562 if (speed_p)
10563 *cost += extra_cost->alu.arith_shift_reg;
10565 else if (speed_p)
10566 *cost += extra_cost->alu.arith_shift;
10567 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10568 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10569 return true;
10572 *cost = COSTS_N_INSNS (1);
10573 if (speed_p)
10574 *cost += extra_cost->alu.arith;
10575 if (CONST_INT_P (XEXP (x, 1))
10576 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10578 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10579 return true;
10581 return false;
10584 /* Vector mode? */
10586 *cost = LIBCALL_COST (2);
10587 return false;
10589 return true;
10591 case EQ:
10592 case NE:
10593 case LT:
10594 case LE:
10595 case GT:
10596 case GE:
10597 case LTU:
10598 case LEU:
10599 case GEU:
10600 case GTU:
10601 case ORDERED:
10602 case UNORDERED:
10603 case UNEQ:
10604 case UNLE:
10605 case UNLT:
10606 case UNGE:
10607 case UNGT:
10608 case LTGT:
10609 if (outer_code == SET)
10611 /* Is it a store-flag operation? */
10612 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10613 && XEXP (x, 1) == const0_rtx)
10615 /* Thumb also needs an IT insn. */
10616 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10617 return true;
10619 if (XEXP (x, 1) == const0_rtx)
10621 switch (code)
10623 case LT:
10624 /* LSR Rd, Rn, #31. */
10625 *cost = COSTS_N_INSNS (1);
10626 if (speed_p)
10627 *cost += extra_cost->alu.shift;
10628 break;
10630 case EQ:
10631 /* RSBS T1, Rn, #0
10632 ADC Rd, Rn, T1. */
10634 case NE:
10635 /* SUBS T1, Rn, #1
10636 SBC Rd, Rn, T1. */
10637 *cost = COSTS_N_INSNS (2);
10638 break;
10640 case LE:
10641 /* RSBS T1, Rn, Rn, LSR #31
10642 ADC Rd, Rn, T1. */
10643 *cost = COSTS_N_INSNS (2);
10644 if (speed_p)
10645 *cost += extra_cost->alu.arith_shift;
10646 break;
10648 case GT:
10649 /* RSB Rd, Rn, Rn, ASR #1
10650 LSR Rd, Rd, #31. */
10651 *cost = COSTS_N_INSNS (2);
10652 if (speed_p)
10653 *cost += (extra_cost->alu.arith_shift
10654 + extra_cost->alu.shift);
10655 break;
10657 case GE:
10658 /* ASR Rd, Rn, #31
10659 ADD Rd, Rn, #1. */
10660 *cost = COSTS_N_INSNS (2);
10661 if (speed_p)
10662 *cost += extra_cost->alu.shift;
10663 break;
10665 default:
10666 /* Remaining cases are either meaningless or would take
10667 three insns anyway. */
10668 *cost = COSTS_N_INSNS (3);
10669 break;
10671 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10672 return true;
10674 else
10676 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10677 if (CONST_INT_P (XEXP (x, 1))
10678 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10680 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10681 return true;
10684 return false;
10687 /* Not directly inside a set. If it involves the condition code
10688 register it must be the condition for a branch, cond_exec or
10689 I_T_E operation. Since the comparison is performed elsewhere
10690 this is just the control part which has no additional
10691 cost. */
10692 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10693 && XEXP (x, 1) == const0_rtx)
10695 *cost = 0;
10696 return true;
10698 return false;
10700 case ABS:
10701 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10702 && (mode == SFmode || !TARGET_VFP_SINGLE))
10704 *cost = COSTS_N_INSNS (1);
10705 if (speed_p)
10706 *cost += extra_cost->fp[mode != SFmode].neg;
10708 return false;
10710 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10712 *cost = LIBCALL_COST (1);
10713 return false;
10716 if (mode == SImode)
10718 *cost = COSTS_N_INSNS (1);
10719 if (speed_p)
10720 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10721 return false;
10723 /* Vector mode? */
10724 *cost = LIBCALL_COST (1);
10725 return false;
10727 case SIGN_EXTEND:
10728 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10729 && MEM_P (XEXP (x, 0)))
10731 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10733 if (mode == DImode)
10734 *cost += COSTS_N_INSNS (1);
10736 if (!speed_p)
10737 return true;
10739 if (GET_MODE (XEXP (x, 0)) == SImode)
10740 *cost += extra_cost->ldst.load;
10741 else
10742 *cost += extra_cost->ldst.load_sign_extend;
10744 if (mode == DImode)
10745 *cost += extra_cost->alu.shift;
10747 return true;
10750 /* Widening from less than 32-bits requires an extend operation. */
10751 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10753 /* We have SXTB/SXTH. */
10754 *cost = COSTS_N_INSNS (1);
10755 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10756 if (speed_p)
10757 *cost += extra_cost->alu.extend;
10759 else if (GET_MODE (XEXP (x, 0)) != SImode)
10761 /* Needs two shifts. */
10762 *cost = COSTS_N_INSNS (2);
10763 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10764 if (speed_p)
10765 *cost += 2 * extra_cost->alu.shift;
10768 /* Widening beyond 32-bits requires one more insn. */
10769 if (mode == DImode)
10771 *cost += COSTS_N_INSNS (1);
10772 if (speed_p)
10773 *cost += extra_cost->alu.shift;
10776 return true;
10778 case ZERO_EXTEND:
10779 if ((arm_arch4
10780 || GET_MODE (XEXP (x, 0)) == SImode
10781 || GET_MODE (XEXP (x, 0)) == QImode)
10782 && MEM_P (XEXP (x, 0)))
10784 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10786 if (mode == DImode)
10787 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10789 return true;
10792 /* Widening from less than 32-bits requires an extend operation. */
10793 if (GET_MODE (XEXP (x, 0)) == QImode)
10795 /* UXTB can be a shorter instruction in Thumb2, but it might
10796 be slower than the AND Rd, Rn, #255 alternative. When
10797 optimizing for speed it should never be slower to use
10798 AND, and we don't really model 16-bit vs 32-bit insns
10799 here. */
10800 *cost = COSTS_N_INSNS (1);
10801 if (speed_p)
10802 *cost += extra_cost->alu.logical;
10804 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10806 /* We have UXTB/UXTH. */
10807 *cost = COSTS_N_INSNS (1);
10808 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10809 if (speed_p)
10810 *cost += extra_cost->alu.extend;
10812 else if (GET_MODE (XEXP (x, 0)) != SImode)
10814 /* Needs two shifts. It's marginally preferable to use
10815 shifts rather than two BIC instructions as the second
10816 shift may merge with a subsequent insn as a shifter
10817 op. */
10818 *cost = COSTS_N_INSNS (2);
10819 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10820 if (speed_p)
10821 *cost += 2 * extra_cost->alu.shift;
10823 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10824 *cost = COSTS_N_INSNS (1);
10826 /* Widening beyond 32-bits requires one more insn. */
10827 if (mode == DImode)
10829 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10832 return true;
10834 case CONST_INT:
10835 *cost = 0;
10836 /* CONST_INT has no mode, so we cannot tell for sure how many
10837 insns are really going to be needed. The best we can do is
10838 look at the value passed. If it fits in SImode, then assume
10839 that's the mode it will be used for. Otherwise assume it
10840 will be used in DImode. */
10841 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10842 mode = SImode;
10843 else
10844 mode = DImode;
10846 /* Avoid blowing up in arm_gen_constant (). */
10847 if (!(outer_code == PLUS
10848 || outer_code == AND
10849 || outer_code == IOR
10850 || outer_code == XOR
10851 || outer_code == MINUS))
10852 outer_code = SET;
10854 const_int_cost:
10855 if (mode == SImode)
10857 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10858 INTVAL (x), NULL, NULL,
10859 0, 0));
10860 /* Extra costs? */
10862 else
10864 *cost += COSTS_N_INSNS (arm_gen_constant
10865 (outer_code, SImode, NULL,
10866 trunc_int_for_mode (INTVAL (x), SImode),
10867 NULL, NULL, 0, 0)
10868 + arm_gen_constant (outer_code, SImode, NULL,
10869 INTVAL (x) >> 32, NULL,
10870 NULL, 0, 0));
10871 /* Extra costs? */
10874 return true;
10876 case CONST:
10877 case LABEL_REF:
10878 case SYMBOL_REF:
10879 if (speed_p)
10881 if (arm_arch_thumb2 && !flag_pic)
10882 *cost = COSTS_N_INSNS (2);
10883 else
10884 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10886 else
10887 *cost = COSTS_N_INSNS (2);
10889 if (flag_pic)
10891 *cost += COSTS_N_INSNS (1);
10892 if (speed_p)
10893 *cost += extra_cost->alu.arith;
10896 return true;
10898 case CONST_FIXED:
10899 *cost = COSTS_N_INSNS (4);
10900 /* Fixme. */
10901 return true;
10903 case CONST_DOUBLE:
10904 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10905 && (mode == SFmode || !TARGET_VFP_SINGLE))
10907 if (vfp3_const_double_rtx (x))
10909 *cost = COSTS_N_INSNS (1);
10910 if (speed_p)
10911 *cost += extra_cost->fp[mode == DFmode].fpconst;
10912 return true;
10915 if (speed_p)
10917 *cost = COSTS_N_INSNS (1);
10918 if (mode == DFmode)
10919 *cost += extra_cost->ldst.loadd;
10920 else
10921 *cost += extra_cost->ldst.loadf;
10923 else
10924 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10926 return true;
10928 *cost = COSTS_N_INSNS (4);
10929 return true;
10931 case CONST_VECTOR:
10932 /* Fixme. */
10933 if (TARGET_NEON
10934 && TARGET_HARD_FLOAT
10935 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10936 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10937 *cost = COSTS_N_INSNS (1);
10938 else
10939 *cost = COSTS_N_INSNS (4);
10940 return true;
10942 case HIGH:
10943 case LO_SUM:
10944 *cost = COSTS_N_INSNS (1);
10945 /* When optimizing for size, we prefer constant pool entries to
10946 MOVW/MOVT pairs, so bump the cost of these slightly. */
10947 if (!speed_p)
10948 *cost += 1;
10949 return true;
10951 case CLZ:
10952 *cost = COSTS_N_INSNS (1);
10953 if (speed_p)
10954 *cost += extra_cost->alu.clz;
10955 return false;
10957 case SMIN:
10958 if (XEXP (x, 1) == const0_rtx)
10960 *cost = COSTS_N_INSNS (1);
10961 if (speed_p)
10962 *cost += extra_cost->alu.log_shift;
10963 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10964 return true;
10966 /* Fall through. */
10967 case SMAX:
10968 case UMIN:
10969 case UMAX:
10970 *cost = COSTS_N_INSNS (2);
10971 return false;
10973 case TRUNCATE:
10974 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10975 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10976 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10977 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10978 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10979 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10980 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10981 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10982 == ZERO_EXTEND))))
10984 *cost = COSTS_N_INSNS (1);
10985 if (speed_p)
10986 *cost += extra_cost->mult[1].extend;
10987 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10988 speed_p)
10989 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10990 0, speed_p));
10991 return true;
10993 *cost = LIBCALL_COST (1);
10994 return false;
10996 case UNSPEC:
10997 return arm_unspec_cost (x, outer_code, speed_p, cost);
10999 case PC:
11000 /* Reading the PC is like reading any other register. Writing it
11001 is more expensive, but we take that into account elsewhere. */
11002 *cost = 0;
11003 return true;
11005 case ZERO_EXTRACT:
11006 /* TODO: Simple zero_extract of bottom bits using AND. */
11007 /* Fall through. */
11008 case SIGN_EXTRACT:
11009 if (arm_arch6
11010 && mode == SImode
11011 && CONST_INT_P (XEXP (x, 1))
11012 && CONST_INT_P (XEXP (x, 2)))
11014 *cost = COSTS_N_INSNS (1);
11015 if (speed_p)
11016 *cost += extra_cost->alu.bfx;
11017 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11018 return true;
11020 /* Without UBFX/SBFX, need to resort to shift operations. */
11021 *cost = COSTS_N_INSNS (2);
11022 if (speed_p)
11023 *cost += 2 * extra_cost->alu.shift;
11024 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
11025 return true;
11027 case FLOAT_EXTEND:
11028 if (TARGET_HARD_FLOAT)
11030 *cost = COSTS_N_INSNS (1);
11031 if (speed_p)
11032 *cost += extra_cost->fp[mode == DFmode].widen;
11033 if (!TARGET_FPU_ARMV8
11034 && GET_MODE (XEXP (x, 0)) == HFmode)
11036 /* Pre v8, widening HF->DF is a two-step process, first
11037 widening to SFmode. */
11038 *cost += COSTS_N_INSNS (1);
11039 if (speed_p)
11040 *cost += extra_cost->fp[0].widen;
11042 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11043 return true;
11046 *cost = LIBCALL_COST (1);
11047 return false;
11049 case FLOAT_TRUNCATE:
11050 if (TARGET_HARD_FLOAT)
11052 *cost = COSTS_N_INSNS (1);
11053 if (speed_p)
11054 *cost += extra_cost->fp[mode == DFmode].narrow;
11055 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11056 return true;
11057 /* Vector modes? */
11059 *cost = LIBCALL_COST (1);
11060 return false;
11062 case FMA:
11063 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11065 rtx op0 = XEXP (x, 0);
11066 rtx op1 = XEXP (x, 1);
11067 rtx op2 = XEXP (x, 2);
11069 *cost = COSTS_N_INSNS (1);
11071 /* vfms or vfnma. */
11072 if (GET_CODE (op0) == NEG)
11073 op0 = XEXP (op0, 0);
11075 /* vfnms or vfnma. */
11076 if (GET_CODE (op2) == NEG)
11077 op2 = XEXP (op2, 0);
11079 *cost += rtx_cost (op0, FMA, 0, speed_p);
11080 *cost += rtx_cost (op1, FMA, 1, speed_p);
11081 *cost += rtx_cost (op2, FMA, 2, speed_p);
11083 if (speed_p)
11084 *cost += extra_cost->fp[mode ==DFmode].fma;
11086 return true;
11089 *cost = LIBCALL_COST (3);
11090 return false;
11092 case FIX:
11093 case UNSIGNED_FIX:
11094 if (TARGET_HARD_FLOAT)
11096 if (GET_MODE_CLASS (mode) == MODE_INT)
11098 *cost = COSTS_N_INSNS (1);
11099 if (speed_p)
11100 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
11101 /* Strip of the 'cost' of rounding towards zero. */
11102 if (GET_CODE (XEXP (x, 0)) == FIX)
11103 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
11104 else
11105 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11106 /* ??? Increase the cost to deal with transferring from
11107 FP -> CORE registers? */
11108 return true;
11110 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11111 && TARGET_FPU_ARMV8)
11113 *cost = COSTS_N_INSNS (1);
11114 if (speed_p)
11115 *cost += extra_cost->fp[mode == DFmode].roundint;
11116 return false;
11118 /* Vector costs? */
11120 *cost = LIBCALL_COST (1);
11121 return false;
11123 case FLOAT:
11124 case UNSIGNED_FLOAT:
11125 if (TARGET_HARD_FLOAT)
11127 /* ??? Increase the cost to deal with transferring from CORE
11128 -> FP registers? */
11129 *cost = COSTS_N_INSNS (1);
11130 if (speed_p)
11131 *cost += extra_cost->fp[mode == DFmode].fromint;
11132 return false;
11134 *cost = LIBCALL_COST (1);
11135 return false;
11137 case CALL:
11138 *cost = COSTS_N_INSNS (1);
11139 return true;
11141 case ASM_OPERANDS:
11143 /* Just a guess. Guess number of instructions in the asm
11144 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11145 though (see PR60663). */
11146 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11147 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11149 *cost = COSTS_N_INSNS (asm_length + num_operands);
11150 return true;
11152 default:
11153 if (mode != VOIDmode)
11154 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11155 else
11156 *cost = COSTS_N_INSNS (4); /* Who knows? */
11157 return false;
11161 #undef HANDLE_NARROW_SHIFT_ARITH
11163 /* RTX costs when optimizing for size. */
11164 static bool
11165 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
11166 int *total, bool speed)
11168 bool result;
11170 if (TARGET_OLD_RTX_COSTS
11171 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
11173 /* Old way. (Deprecated.) */
11174 if (!speed)
11175 result = arm_size_rtx_costs (x, (enum rtx_code) code,
11176 (enum rtx_code) outer_code, total);
11177 else
11178 result = current_tune->rtx_costs (x, (enum rtx_code) code,
11179 (enum rtx_code) outer_code, total,
11180 speed);
11182 else
11184 /* New way. */
11185 if (current_tune->insn_extra_cost)
11186 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11187 (enum rtx_code) outer_code,
11188 current_tune->insn_extra_cost,
11189 total, speed);
11190 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11191 && current_tune->insn_extra_cost != NULL */
11192 else
11193 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11194 (enum rtx_code) outer_code,
11195 &generic_extra_costs, total, speed);
11198 if (dump_file && (dump_flags & TDF_DETAILS))
11200 print_rtl_single (dump_file, x);
11201 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11202 *total, result ? "final" : "partial");
11204 return result;
11207 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11208 supported on any "slowmul" cores, so it can be ignored. */
11210 static bool
11211 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11212 int *total, bool speed)
11214 machine_mode mode = GET_MODE (x);
11216 if (TARGET_THUMB)
11218 *total = thumb1_rtx_costs (x, code, outer_code);
11219 return true;
11222 switch (code)
11224 case MULT:
11225 if (GET_MODE_CLASS (mode) == MODE_FLOAT
11226 || mode == DImode)
11228 *total = COSTS_N_INSNS (20);
11229 return false;
11232 if (CONST_INT_P (XEXP (x, 1)))
11234 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11235 & (unsigned HOST_WIDE_INT) 0xffffffff);
11236 int cost, const_ok = const_ok_for_arm (i);
11237 int j, booth_unit_size;
11239 /* Tune as appropriate. */
11240 cost = const_ok ? 4 : 8;
11241 booth_unit_size = 2;
11242 for (j = 0; i && j < 32; j += booth_unit_size)
11244 i >>= booth_unit_size;
11245 cost++;
11248 *total = COSTS_N_INSNS (cost);
11249 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
11250 return true;
11253 *total = COSTS_N_INSNS (20);
11254 return false;
11256 default:
11257 return arm_rtx_costs_1 (x, outer_code, total, speed);;
11262 /* RTX cost for cores with a fast multiply unit (M variants). */
11264 static bool
11265 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11266 int *total, bool speed)
11268 machine_mode mode = GET_MODE (x);
11270 if (TARGET_THUMB1)
11272 *total = thumb1_rtx_costs (x, code, outer_code);
11273 return true;
11276 /* ??? should thumb2 use different costs? */
11277 switch (code)
11279 case MULT:
11280 /* There is no point basing this on the tuning, since it is always the
11281 fast variant if it exists at all. */
11282 if (mode == DImode
11283 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11284 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11285 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11287 *total = COSTS_N_INSNS(2);
11288 return false;
11292 if (mode == DImode)
11294 *total = COSTS_N_INSNS (5);
11295 return false;
11298 if (CONST_INT_P (XEXP (x, 1)))
11300 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11301 & (unsigned HOST_WIDE_INT) 0xffffffff);
11302 int cost, const_ok = const_ok_for_arm (i);
11303 int j, booth_unit_size;
11305 /* Tune as appropriate. */
11306 cost = const_ok ? 4 : 8;
11307 booth_unit_size = 8;
11308 for (j = 0; i && j < 32; j += booth_unit_size)
11310 i >>= booth_unit_size;
11311 cost++;
11314 *total = COSTS_N_INSNS(cost);
11315 return false;
11318 if (mode == SImode)
11320 *total = COSTS_N_INSNS (4);
11321 return false;
11324 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11326 if (TARGET_HARD_FLOAT
11327 && (mode == SFmode
11328 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11330 *total = COSTS_N_INSNS (1);
11331 return false;
11335 /* Requires a lib call */
11336 *total = COSTS_N_INSNS (20);
11337 return false;
11339 default:
11340 return arm_rtx_costs_1 (x, outer_code, total, speed);
11345 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11346 so it can be ignored. */
11348 static bool
11349 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11350 int *total, bool speed)
11352 machine_mode mode = GET_MODE (x);
11354 if (TARGET_THUMB)
11356 *total = thumb1_rtx_costs (x, code, outer_code);
11357 return true;
11360 switch (code)
11362 case COMPARE:
11363 if (GET_CODE (XEXP (x, 0)) != MULT)
11364 return arm_rtx_costs_1 (x, outer_code, total, speed);
11366 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11367 will stall until the multiplication is complete. */
11368 *total = COSTS_N_INSNS (3);
11369 return false;
11371 case MULT:
11372 /* There is no point basing this on the tuning, since it is always the
11373 fast variant if it exists at all. */
11374 if (mode == DImode
11375 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11376 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11377 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11379 *total = COSTS_N_INSNS (2);
11380 return false;
11384 if (mode == DImode)
11386 *total = COSTS_N_INSNS (5);
11387 return false;
11390 if (CONST_INT_P (XEXP (x, 1)))
11392 /* If operand 1 is a constant we can more accurately
11393 calculate the cost of the multiply. The multiplier can
11394 retire 15 bits on the first cycle and a further 12 on the
11395 second. We do, of course, have to load the constant into
11396 a register first. */
11397 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11398 /* There's a general overhead of one cycle. */
11399 int cost = 1;
11400 unsigned HOST_WIDE_INT masked_const;
11402 if (i & 0x80000000)
11403 i = ~i;
11405 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11407 masked_const = i & 0xffff8000;
11408 if (masked_const != 0)
11410 cost++;
11411 masked_const = i & 0xf8000000;
11412 if (masked_const != 0)
11413 cost++;
11415 *total = COSTS_N_INSNS (cost);
11416 return false;
11419 if (mode == SImode)
11421 *total = COSTS_N_INSNS (3);
11422 return false;
11425 /* Requires a lib call */
11426 *total = COSTS_N_INSNS (20);
11427 return false;
11429 default:
11430 return arm_rtx_costs_1 (x, outer_code, total, speed);
11435 /* RTX costs for 9e (and later) cores. */
11437 static bool
11438 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11439 int *total, bool speed)
11441 machine_mode mode = GET_MODE (x);
11443 if (TARGET_THUMB1)
11445 switch (code)
11447 case MULT:
11448 /* Small multiply: 32 cycles for an integer multiply inst. */
11449 if (arm_arch6m && arm_m_profile_small_mul)
11450 *total = COSTS_N_INSNS (32);
11451 else
11452 *total = COSTS_N_INSNS (3);
11453 return true;
11455 default:
11456 *total = thumb1_rtx_costs (x, code, outer_code);
11457 return true;
11461 switch (code)
11463 case MULT:
11464 /* There is no point basing this on the tuning, since it is always the
11465 fast variant if it exists at all. */
11466 if (mode == DImode
11467 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11468 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11469 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11471 *total = COSTS_N_INSNS (2);
11472 return false;
11476 if (mode == DImode)
11478 *total = COSTS_N_INSNS (5);
11479 return false;
11482 if (mode == SImode)
11484 *total = COSTS_N_INSNS (2);
11485 return false;
11488 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11490 if (TARGET_HARD_FLOAT
11491 && (mode == SFmode
11492 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11494 *total = COSTS_N_INSNS (1);
11495 return false;
11499 *total = COSTS_N_INSNS (20);
11500 return false;
11502 default:
11503 return arm_rtx_costs_1 (x, outer_code, total, speed);
11506 /* All address computations that can be done are free, but rtx cost returns
11507 the same for practically all of them. So we weight the different types
11508 of address here in the order (most pref first):
11509 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11510 static inline int
11511 arm_arm_address_cost (rtx x)
11513 enum rtx_code c = GET_CODE (x);
11515 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11516 return 0;
11517 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11518 return 10;
11520 if (c == PLUS)
11522 if (CONST_INT_P (XEXP (x, 1)))
11523 return 2;
11525 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11526 return 3;
11528 return 4;
11531 return 6;
11534 static inline int
11535 arm_thumb_address_cost (rtx x)
11537 enum rtx_code c = GET_CODE (x);
11539 if (c == REG)
11540 return 1;
11541 if (c == PLUS
11542 && REG_P (XEXP (x, 0))
11543 && CONST_INT_P (XEXP (x, 1)))
11544 return 1;
11546 return 2;
11549 static int
11550 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11551 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11553 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11556 /* Adjust cost hook for XScale. */
11557 static bool
11558 xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11560 /* Some true dependencies can have a higher cost depending
11561 on precisely how certain input operands are used. */
11562 if (REG_NOTE_KIND(link) == 0
11563 && recog_memoized (insn) >= 0
11564 && recog_memoized (dep) >= 0)
11566 int shift_opnum = get_attr_shift (insn);
11567 enum attr_type attr_type = get_attr_type (dep);
11569 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11570 operand for INSN. If we have a shifted input operand and the
11571 instruction we depend on is another ALU instruction, then we may
11572 have to account for an additional stall. */
11573 if (shift_opnum != 0
11574 && (attr_type == TYPE_ALU_SHIFT_IMM
11575 || attr_type == TYPE_ALUS_SHIFT_IMM
11576 || attr_type == TYPE_LOGIC_SHIFT_IMM
11577 || attr_type == TYPE_LOGICS_SHIFT_IMM
11578 || attr_type == TYPE_ALU_SHIFT_REG
11579 || attr_type == TYPE_ALUS_SHIFT_REG
11580 || attr_type == TYPE_LOGIC_SHIFT_REG
11581 || attr_type == TYPE_LOGICS_SHIFT_REG
11582 || attr_type == TYPE_MOV_SHIFT
11583 || attr_type == TYPE_MVN_SHIFT
11584 || attr_type == TYPE_MOV_SHIFT_REG
11585 || attr_type == TYPE_MVN_SHIFT_REG))
11587 rtx shifted_operand;
11588 int opno;
11590 /* Get the shifted operand. */
11591 extract_insn (insn);
11592 shifted_operand = recog_data.operand[shift_opnum];
11594 /* Iterate over all the operands in DEP. If we write an operand
11595 that overlaps with SHIFTED_OPERAND, then we have increase the
11596 cost of this dependency. */
11597 extract_insn (dep);
11598 preprocess_constraints (dep);
11599 for (opno = 0; opno < recog_data.n_operands; opno++)
11601 /* We can ignore strict inputs. */
11602 if (recog_data.operand_type[opno] == OP_IN)
11603 continue;
11605 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11606 shifted_operand))
11608 *cost = 2;
11609 return false;
11614 return true;
11617 /* Adjust cost hook for Cortex A9. */
11618 static bool
11619 cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11621 switch (REG_NOTE_KIND (link))
11623 case REG_DEP_ANTI:
11624 *cost = 0;
11625 return false;
11627 case REG_DEP_TRUE:
11628 case REG_DEP_OUTPUT:
11629 if (recog_memoized (insn) >= 0
11630 && recog_memoized (dep) >= 0)
11632 if (GET_CODE (PATTERN (insn)) == SET)
11634 if (GET_MODE_CLASS
11635 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11636 || GET_MODE_CLASS
11637 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11639 enum attr_type attr_type_insn = get_attr_type (insn);
11640 enum attr_type attr_type_dep = get_attr_type (dep);
11642 /* By default all dependencies of the form
11643 s0 = s0 <op> s1
11644 s0 = s0 <op> s2
11645 have an extra latency of 1 cycle because
11646 of the input and output dependency in this
11647 case. However this gets modeled as an true
11648 dependency and hence all these checks. */
11649 if (REG_P (SET_DEST (PATTERN (insn)))
11650 && REG_P (SET_DEST (PATTERN (dep)))
11651 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11652 SET_DEST (PATTERN (dep))))
11654 /* FMACS is a special case where the dependent
11655 instruction can be issued 3 cycles before
11656 the normal latency in case of an output
11657 dependency. */
11658 if ((attr_type_insn == TYPE_FMACS
11659 || attr_type_insn == TYPE_FMACD)
11660 && (attr_type_dep == TYPE_FMACS
11661 || attr_type_dep == TYPE_FMACD))
11663 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11664 *cost = insn_default_latency (dep) - 3;
11665 else
11666 *cost = insn_default_latency (dep);
11667 return false;
11669 else
11671 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11672 *cost = insn_default_latency (dep) + 1;
11673 else
11674 *cost = insn_default_latency (dep);
11676 return false;
11681 break;
11683 default:
11684 gcc_unreachable ();
11687 return true;
11690 /* Adjust cost hook for FA726TE. */
11691 static bool
11692 fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11694 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11695 have penalty of 3. */
11696 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11697 && recog_memoized (insn) >= 0
11698 && recog_memoized (dep) >= 0
11699 && get_attr_conds (dep) == CONDS_SET)
11701 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11702 if (get_attr_conds (insn) == CONDS_USE
11703 && get_attr_type (insn) != TYPE_BRANCH)
11705 *cost = 3;
11706 return false;
11709 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11710 || get_attr_conds (insn) == CONDS_USE)
11712 *cost = 0;
11713 return false;
11717 return true;
11720 /* Implement TARGET_REGISTER_MOVE_COST.
11722 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11723 it is typically more expensive than a single memory access. We set
11724 the cost to less than two memory accesses so that floating
11725 point to integer conversion does not go through memory. */
11728 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11729 reg_class_t from, reg_class_t to)
11731 if (TARGET_32BIT)
11733 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11734 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11735 return 15;
11736 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11737 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11738 return 4;
11739 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11740 return 20;
11741 else
11742 return 2;
11744 else
11746 if (from == HI_REGS || to == HI_REGS)
11747 return 4;
11748 else
11749 return 2;
11753 /* Implement TARGET_MEMORY_MOVE_COST. */
11756 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11757 bool in ATTRIBUTE_UNUSED)
11759 if (TARGET_32BIT)
11760 return 10;
11761 else
11763 if (GET_MODE_SIZE (mode) < 4)
11764 return 8;
11765 else
11766 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11770 /* Vectorizer cost model implementation. */
11772 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11773 static int
11774 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11775 tree vectype,
11776 int misalign ATTRIBUTE_UNUSED)
11778 unsigned elements;
11780 switch (type_of_cost)
11782 case scalar_stmt:
11783 return current_tune->vec_costs->scalar_stmt_cost;
11785 case scalar_load:
11786 return current_tune->vec_costs->scalar_load_cost;
11788 case scalar_store:
11789 return current_tune->vec_costs->scalar_store_cost;
11791 case vector_stmt:
11792 return current_tune->vec_costs->vec_stmt_cost;
11794 case vector_load:
11795 return current_tune->vec_costs->vec_align_load_cost;
11797 case vector_store:
11798 return current_tune->vec_costs->vec_store_cost;
11800 case vec_to_scalar:
11801 return current_tune->vec_costs->vec_to_scalar_cost;
11803 case scalar_to_vec:
11804 return current_tune->vec_costs->scalar_to_vec_cost;
11806 case unaligned_load:
11807 return current_tune->vec_costs->vec_unalign_load_cost;
11809 case unaligned_store:
11810 return current_tune->vec_costs->vec_unalign_store_cost;
11812 case cond_branch_taken:
11813 return current_tune->vec_costs->cond_taken_branch_cost;
11815 case cond_branch_not_taken:
11816 return current_tune->vec_costs->cond_not_taken_branch_cost;
11818 case vec_perm:
11819 case vec_promote_demote:
11820 return current_tune->vec_costs->vec_stmt_cost;
11822 case vec_construct:
11823 elements = TYPE_VECTOR_SUBPARTS (vectype);
11824 return elements / 2 + 1;
11826 default:
11827 gcc_unreachable ();
11831 /* Implement targetm.vectorize.add_stmt_cost. */
11833 static unsigned
11834 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11835 struct _stmt_vec_info *stmt_info, int misalign,
11836 enum vect_cost_model_location where)
11838 unsigned *cost = (unsigned *) data;
11839 unsigned retval = 0;
11841 if (flag_vect_cost_model)
11843 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11844 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11846 /* Statements in an inner loop relative to the loop being
11847 vectorized are weighted more heavily. The value here is
11848 arbitrary and could potentially be improved with analysis. */
11849 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11850 count *= 50; /* FIXME. */
11852 retval = (unsigned) (count * stmt_cost);
11853 cost[where] += retval;
11856 return retval;
11859 /* Return true if and only if this insn can dual-issue only as older. */
11860 static bool
11861 cortexa7_older_only (rtx_insn *insn)
11863 if (recog_memoized (insn) < 0)
11864 return false;
11866 switch (get_attr_type (insn))
11868 case TYPE_ALU_DSP_REG:
11869 case TYPE_ALU_SREG:
11870 case TYPE_ALUS_SREG:
11871 case TYPE_LOGIC_REG:
11872 case TYPE_LOGICS_REG:
11873 case TYPE_ADC_REG:
11874 case TYPE_ADCS_REG:
11875 case TYPE_ADR:
11876 case TYPE_BFM:
11877 case TYPE_REV:
11878 case TYPE_MVN_REG:
11879 case TYPE_SHIFT_IMM:
11880 case TYPE_SHIFT_REG:
11881 case TYPE_LOAD_BYTE:
11882 case TYPE_LOAD1:
11883 case TYPE_STORE1:
11884 case TYPE_FFARITHS:
11885 case TYPE_FADDS:
11886 case TYPE_FFARITHD:
11887 case TYPE_FADDD:
11888 case TYPE_FMOV:
11889 case TYPE_F_CVT:
11890 case TYPE_FCMPS:
11891 case TYPE_FCMPD:
11892 case TYPE_FCONSTS:
11893 case TYPE_FCONSTD:
11894 case TYPE_FMULS:
11895 case TYPE_FMACS:
11896 case TYPE_FMULD:
11897 case TYPE_FMACD:
11898 case TYPE_FDIVS:
11899 case TYPE_FDIVD:
11900 case TYPE_F_MRC:
11901 case TYPE_F_MRRC:
11902 case TYPE_F_FLAG:
11903 case TYPE_F_LOADS:
11904 case TYPE_F_STORES:
11905 return true;
11906 default:
11907 return false;
11911 /* Return true if and only if this insn can dual-issue as younger. */
11912 static bool
11913 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11915 if (recog_memoized (insn) < 0)
11917 if (verbose > 5)
11918 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11919 return false;
11922 switch (get_attr_type (insn))
11924 case TYPE_ALU_IMM:
11925 case TYPE_ALUS_IMM:
11926 case TYPE_LOGIC_IMM:
11927 case TYPE_LOGICS_IMM:
11928 case TYPE_EXTEND:
11929 case TYPE_MVN_IMM:
11930 case TYPE_MOV_IMM:
11931 case TYPE_MOV_REG:
11932 case TYPE_MOV_SHIFT:
11933 case TYPE_MOV_SHIFT_REG:
11934 case TYPE_BRANCH:
11935 case TYPE_CALL:
11936 return true;
11937 default:
11938 return false;
11943 /* Look for an instruction that can dual issue only as an older
11944 instruction, and move it in front of any instructions that can
11945 dual-issue as younger, while preserving the relative order of all
11946 other instructions in the ready list. This is a hueuristic to help
11947 dual-issue in later cycles, by postponing issue of more flexible
11948 instructions. This heuristic may affect dual issue opportunities
11949 in the current cycle. */
11950 static void
11951 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11952 int *n_readyp, int clock)
11954 int i;
11955 int first_older_only = -1, first_younger = -1;
11957 if (verbose > 5)
11958 fprintf (file,
11959 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11960 clock,
11961 *n_readyp);
11963 /* Traverse the ready list from the head (the instruction to issue
11964 first), and looking for the first instruction that can issue as
11965 younger and the first instruction that can dual-issue only as
11966 older. */
11967 for (i = *n_readyp - 1; i >= 0; i--)
11969 rtx_insn *insn = ready[i];
11970 if (cortexa7_older_only (insn))
11972 first_older_only = i;
11973 if (verbose > 5)
11974 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11975 break;
11977 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11978 first_younger = i;
11981 /* Nothing to reorder because either no younger insn found or insn
11982 that can dual-issue only as older appears before any insn that
11983 can dual-issue as younger. */
11984 if (first_younger == -1)
11986 if (verbose > 5)
11987 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11988 return;
11991 /* Nothing to reorder because no older-only insn in the ready list. */
11992 if (first_older_only == -1)
11994 if (verbose > 5)
11995 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11996 return;
11999 /* Move first_older_only insn before first_younger. */
12000 if (verbose > 5)
12001 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
12002 INSN_UID(ready [first_older_only]),
12003 INSN_UID(ready [first_younger]));
12004 rtx_insn *first_older_only_insn = ready [first_older_only];
12005 for (i = first_older_only; i < first_younger; i++)
12007 ready[i] = ready[i+1];
12010 ready[i] = first_older_only_insn;
12011 return;
12014 /* Implement TARGET_SCHED_REORDER. */
12015 static int
12016 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
12017 int clock)
12019 switch (arm_tune)
12021 case cortexa7:
12022 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
12023 break;
12024 default:
12025 /* Do nothing for other cores. */
12026 break;
12029 return arm_issue_rate ();
12032 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12033 It corrects the value of COST based on the relationship between
12034 INSN and DEP through the dependence LINK. It returns the new
12035 value. There is a per-core adjust_cost hook to adjust scheduler costs
12036 and the per-core hook can choose to completely override the generic
12037 adjust_cost function. Only put bits of code into arm_adjust_cost that
12038 are common across all cores. */
12039 static int
12040 arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
12042 rtx i_pat, d_pat;
12044 /* When generating Thumb-1 code, we want to place flag-setting operations
12045 close to a conditional branch which depends on them, so that we can
12046 omit the comparison. */
12047 if (TARGET_THUMB1
12048 && REG_NOTE_KIND (link) == 0
12049 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12050 && recog_memoized (dep) >= 0
12051 && get_attr_conds (dep) == CONDS_SET)
12052 return 0;
12054 if (current_tune->sched_adjust_cost != NULL)
12056 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
12057 return cost;
12060 /* XXX Is this strictly true? */
12061 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
12062 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
12063 return 0;
12065 /* Call insns don't incur a stall, even if they follow a load. */
12066 if (REG_NOTE_KIND (link) == 0
12067 && CALL_P (insn))
12068 return 1;
12070 if ((i_pat = single_set (insn)) != NULL
12071 && MEM_P (SET_SRC (i_pat))
12072 && (d_pat = single_set (dep)) != NULL
12073 && MEM_P (SET_DEST (d_pat)))
12075 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12076 /* This is a load after a store, there is no conflict if the load reads
12077 from a cached area. Assume that loads from the stack, and from the
12078 constant pool are cached, and that others will miss. This is a
12079 hack. */
12081 if ((GET_CODE (src_mem) == SYMBOL_REF
12082 && CONSTANT_POOL_ADDRESS_P (src_mem))
12083 || reg_mentioned_p (stack_pointer_rtx, src_mem)
12084 || reg_mentioned_p (frame_pointer_rtx, src_mem)
12085 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12086 return 1;
12089 return cost;
12093 arm_max_conditional_execute (void)
12095 return max_insns_skipped;
12098 static int
12099 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12101 if (TARGET_32BIT)
12102 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12103 else
12104 return (optimize > 0) ? 2 : 0;
12107 static int
12108 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12110 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12113 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12114 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12115 sequences of non-executed instructions in IT blocks probably take the same
12116 amount of time as executed instructions (and the IT instruction itself takes
12117 space in icache). This function was experimentally determined to give good
12118 results on a popular embedded benchmark. */
12120 static int
12121 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12123 return (TARGET_32BIT && speed_p) ? 1
12124 : arm_default_branch_cost (speed_p, predictable_p);
12127 static int
12128 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12130 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12133 static bool fp_consts_inited = false;
12135 static REAL_VALUE_TYPE value_fp0;
12137 static void
12138 init_fp_table (void)
12140 REAL_VALUE_TYPE r;
12142 r = REAL_VALUE_ATOF ("0", DFmode);
12143 value_fp0 = r;
12144 fp_consts_inited = true;
12147 /* Return TRUE if rtx X is a valid immediate FP constant. */
12149 arm_const_double_rtx (rtx x)
12151 REAL_VALUE_TYPE r;
12153 if (!fp_consts_inited)
12154 init_fp_table ();
12156 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12157 if (REAL_VALUE_MINUS_ZERO (r))
12158 return 0;
12160 if (REAL_VALUES_EQUAL (r, value_fp0))
12161 return 1;
12163 return 0;
12166 /* VFPv3 has a fairly wide range of representable immediates, formed from
12167 "quarter-precision" floating-point values. These can be evaluated using this
12168 formula (with ^ for exponentiation):
12170 -1^s * n * 2^-r
12172 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12173 16 <= n <= 31 and 0 <= r <= 7.
12175 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12177 - A (most-significant) is the sign bit.
12178 - BCD are the exponent (encoded as r XOR 3).
12179 - EFGH are the mantissa (encoded as n - 16).
12182 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12183 fconst[sd] instruction, or -1 if X isn't suitable. */
12184 static int
12185 vfp3_const_double_index (rtx x)
12187 REAL_VALUE_TYPE r, m;
12188 int sign, exponent;
12189 unsigned HOST_WIDE_INT mantissa, mant_hi;
12190 unsigned HOST_WIDE_INT mask;
12191 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12192 bool fail;
12194 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12195 return -1;
12197 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12199 /* We can't represent these things, so detect them first. */
12200 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12201 return -1;
12203 /* Extract sign, exponent and mantissa. */
12204 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12205 r = real_value_abs (&r);
12206 exponent = REAL_EXP (&r);
12207 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12208 highest (sign) bit, with a fixed binary point at bit point_pos.
12209 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12210 bits for the mantissa, this may fail (low bits would be lost). */
12211 real_ldexp (&m, &r, point_pos - exponent);
12212 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12213 mantissa = w.elt (0);
12214 mant_hi = w.elt (1);
12216 /* If there are bits set in the low part of the mantissa, we can't
12217 represent this value. */
12218 if (mantissa != 0)
12219 return -1;
12221 /* Now make it so that mantissa contains the most-significant bits, and move
12222 the point_pos to indicate that the least-significant bits have been
12223 discarded. */
12224 point_pos -= HOST_BITS_PER_WIDE_INT;
12225 mantissa = mant_hi;
12227 /* We can permit four significant bits of mantissa only, plus a high bit
12228 which is always 1. */
12229 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12230 if ((mantissa & mask) != 0)
12231 return -1;
12233 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12234 mantissa >>= point_pos - 5;
12236 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12237 floating-point immediate zero with Neon using an integer-zero load, but
12238 that case is handled elsewhere.) */
12239 if (mantissa == 0)
12240 return -1;
12242 gcc_assert (mantissa >= 16 && mantissa <= 31);
12244 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12245 normalized significands are in the range [1, 2). (Our mantissa is shifted
12246 left 4 places at this point relative to normalized IEEE754 values). GCC
12247 internally uses [0.5, 1) (see real.c), so the exponent returned from
12248 REAL_EXP must be altered. */
12249 exponent = 5 - exponent;
12251 if (exponent < 0 || exponent > 7)
12252 return -1;
12254 /* Sign, mantissa and exponent are now in the correct form to plug into the
12255 formula described in the comment above. */
12256 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12259 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12261 vfp3_const_double_rtx (rtx x)
12263 if (!TARGET_VFP3)
12264 return 0;
12266 return vfp3_const_double_index (x) != -1;
12269 /* Recognize immediates which can be used in various Neon instructions. Legal
12270 immediates are described by the following table (for VMVN variants, the
12271 bitwise inverse of the constant shown is recognized. In either case, VMOV
12272 is output and the correct instruction to use for a given constant is chosen
12273 by the assembler). The constant shown is replicated across all elements of
12274 the destination vector.
12276 insn elems variant constant (binary)
12277 ---- ----- ------- -----------------
12278 vmov i32 0 00000000 00000000 00000000 abcdefgh
12279 vmov i32 1 00000000 00000000 abcdefgh 00000000
12280 vmov i32 2 00000000 abcdefgh 00000000 00000000
12281 vmov i32 3 abcdefgh 00000000 00000000 00000000
12282 vmov i16 4 00000000 abcdefgh
12283 vmov i16 5 abcdefgh 00000000
12284 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12285 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12286 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12287 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12288 vmvn i16 10 00000000 abcdefgh
12289 vmvn i16 11 abcdefgh 00000000
12290 vmov i32 12 00000000 00000000 abcdefgh 11111111
12291 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12292 vmov i32 14 00000000 abcdefgh 11111111 11111111
12293 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12294 vmov i8 16 abcdefgh
12295 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12296 eeeeeeee ffffffff gggggggg hhhhhhhh
12297 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12298 vmov f32 19 00000000 00000000 00000000 00000000
12300 For case 18, B = !b. Representable values are exactly those accepted by
12301 vfp3_const_double_index, but are output as floating-point numbers rather
12302 than indices.
12304 For case 19, we will change it to vmov.i32 when assembling.
12306 Variants 0-5 (inclusive) may also be used as immediates for the second
12307 operand of VORR/VBIC instructions.
12309 The INVERSE argument causes the bitwise inverse of the given operand to be
12310 recognized instead (used for recognizing legal immediates for the VAND/VORN
12311 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12312 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12313 output, rather than the real insns vbic/vorr).
12315 INVERSE makes no difference to the recognition of float vectors.
12317 The return value is the variant of immediate as shown in the above table, or
12318 -1 if the given value doesn't match any of the listed patterns.
12320 static int
12321 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12322 rtx *modconst, int *elementwidth)
12324 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12325 matches = 1; \
12326 for (i = 0; i < idx; i += (STRIDE)) \
12327 if (!(TEST)) \
12328 matches = 0; \
12329 if (matches) \
12331 immtype = (CLASS); \
12332 elsize = (ELSIZE); \
12333 break; \
12336 unsigned int i, elsize = 0, idx = 0, n_elts;
12337 unsigned int innersize;
12338 unsigned char bytes[16];
12339 int immtype = -1, matches;
12340 unsigned int invmask = inverse ? 0xff : 0;
12341 bool vector = GET_CODE (op) == CONST_VECTOR;
12343 if (vector)
12345 n_elts = CONST_VECTOR_NUNITS (op);
12346 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12348 else
12350 n_elts = 1;
12351 if (mode == VOIDmode)
12352 mode = DImode;
12353 innersize = GET_MODE_SIZE (mode);
12356 /* Vectors of float constants. */
12357 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12359 rtx el0 = CONST_VECTOR_ELT (op, 0);
12360 REAL_VALUE_TYPE r0;
12362 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12363 return -1;
12365 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12367 for (i = 1; i < n_elts; i++)
12369 rtx elt = CONST_VECTOR_ELT (op, i);
12370 REAL_VALUE_TYPE re;
12372 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12374 if (!REAL_VALUES_EQUAL (r0, re))
12375 return -1;
12378 if (modconst)
12379 *modconst = CONST_VECTOR_ELT (op, 0);
12381 if (elementwidth)
12382 *elementwidth = 0;
12384 if (el0 == CONST0_RTX (GET_MODE (el0)))
12385 return 19;
12386 else
12387 return 18;
12390 /* Splat vector constant out into a byte vector. */
12391 for (i = 0; i < n_elts; i++)
12393 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12394 unsigned HOST_WIDE_INT elpart;
12395 unsigned int part, parts;
12397 if (CONST_INT_P (el))
12399 elpart = INTVAL (el);
12400 parts = 1;
12402 else if (CONST_DOUBLE_P (el))
12404 elpart = CONST_DOUBLE_LOW (el);
12405 parts = 2;
12407 else
12408 gcc_unreachable ();
12410 for (part = 0; part < parts; part++)
12412 unsigned int byte;
12413 for (byte = 0; byte < innersize; byte++)
12415 bytes[idx++] = (elpart & 0xff) ^ invmask;
12416 elpart >>= BITS_PER_UNIT;
12418 if (CONST_DOUBLE_P (el))
12419 elpart = CONST_DOUBLE_HIGH (el);
12423 /* Sanity check. */
12424 gcc_assert (idx == GET_MODE_SIZE (mode));
12428 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12429 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12431 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12432 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12434 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12435 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12437 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12438 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12440 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12442 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12444 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12445 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12447 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12448 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12450 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12451 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12453 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12454 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12456 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12458 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12460 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12461 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12463 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12464 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12466 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12467 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12469 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12470 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12472 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12474 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12475 && bytes[i] == bytes[(i + 8) % idx]);
12477 while (0);
12479 if (immtype == -1)
12480 return -1;
12482 if (elementwidth)
12483 *elementwidth = elsize;
12485 if (modconst)
12487 unsigned HOST_WIDE_INT imm = 0;
12489 /* Un-invert bytes of recognized vector, if necessary. */
12490 if (invmask != 0)
12491 for (i = 0; i < idx; i++)
12492 bytes[i] ^= invmask;
12494 if (immtype == 17)
12496 /* FIXME: Broken on 32-bit H_W_I hosts. */
12497 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12499 for (i = 0; i < 8; i++)
12500 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12501 << (i * BITS_PER_UNIT);
12503 *modconst = GEN_INT (imm);
12505 else
12507 unsigned HOST_WIDE_INT imm = 0;
12509 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12510 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12512 *modconst = GEN_INT (imm);
12516 return immtype;
12517 #undef CHECK
12520 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12521 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12522 float elements), and a modified constant (whatever should be output for a
12523 VMOV) in *MODCONST. */
12526 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12527 rtx *modconst, int *elementwidth)
12529 rtx tmpconst;
12530 int tmpwidth;
12531 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12533 if (retval == -1)
12534 return 0;
12536 if (modconst)
12537 *modconst = tmpconst;
12539 if (elementwidth)
12540 *elementwidth = tmpwidth;
12542 return 1;
12545 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12546 the immediate is valid, write a constant suitable for using as an operand
12547 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12548 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12551 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12552 rtx *modconst, int *elementwidth)
12554 rtx tmpconst;
12555 int tmpwidth;
12556 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12558 if (retval < 0 || retval > 5)
12559 return 0;
12561 if (modconst)
12562 *modconst = tmpconst;
12564 if (elementwidth)
12565 *elementwidth = tmpwidth;
12567 return 1;
12570 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12571 the immediate is valid, write a constant suitable for using as an operand
12572 to VSHR/VSHL to *MODCONST and the corresponding element width to
12573 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12574 because they have different limitations. */
12577 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12578 rtx *modconst, int *elementwidth,
12579 bool isleftshift)
12581 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12582 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12583 unsigned HOST_WIDE_INT last_elt = 0;
12584 unsigned HOST_WIDE_INT maxshift;
12586 /* Split vector constant out into a byte vector. */
12587 for (i = 0; i < n_elts; i++)
12589 rtx el = CONST_VECTOR_ELT (op, i);
12590 unsigned HOST_WIDE_INT elpart;
12592 if (CONST_INT_P (el))
12593 elpart = INTVAL (el);
12594 else if (CONST_DOUBLE_P (el))
12595 return 0;
12596 else
12597 gcc_unreachable ();
12599 if (i != 0 && elpart != last_elt)
12600 return 0;
12602 last_elt = elpart;
12605 /* Shift less than element size. */
12606 maxshift = innersize * 8;
12608 if (isleftshift)
12610 /* Left shift immediate value can be from 0 to <size>-1. */
12611 if (last_elt >= maxshift)
12612 return 0;
12614 else
12616 /* Right shift immediate value can be from 1 to <size>. */
12617 if (last_elt == 0 || last_elt > maxshift)
12618 return 0;
12621 if (elementwidth)
12622 *elementwidth = innersize * 8;
12624 if (modconst)
12625 *modconst = CONST_VECTOR_ELT (op, 0);
12627 return 1;
12630 /* Return a string suitable for output of Neon immediate logic operation
12631 MNEM. */
12633 char *
12634 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12635 int inverse, int quad)
12637 int width, is_valid;
12638 static char templ[40];
12640 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12642 gcc_assert (is_valid != 0);
12644 if (quad)
12645 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12646 else
12647 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12649 return templ;
12652 /* Return a string suitable for output of Neon immediate shift operation
12653 (VSHR or VSHL) MNEM. */
12655 char *
12656 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12657 machine_mode mode, int quad,
12658 bool isleftshift)
12660 int width, is_valid;
12661 static char templ[40];
12663 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12664 gcc_assert (is_valid != 0);
12666 if (quad)
12667 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12668 else
12669 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12671 return templ;
12674 /* Output a sequence of pairwise operations to implement a reduction.
12675 NOTE: We do "too much work" here, because pairwise operations work on two
12676 registers-worth of operands in one go. Unfortunately we can't exploit those
12677 extra calculations to do the full operation in fewer steps, I don't think.
12678 Although all vector elements of the result but the first are ignored, we
12679 actually calculate the same result in each of the elements. An alternative
12680 such as initially loading a vector with zero to use as each of the second
12681 operands would use up an additional register and take an extra instruction,
12682 for no particular gain. */
12684 void
12685 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12686 rtx (*reduc) (rtx, rtx, rtx))
12688 machine_mode inner = GET_MODE_INNER (mode);
12689 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12690 rtx tmpsum = op1;
12692 for (i = parts / 2; i >= 1; i /= 2)
12694 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12695 emit_insn (reduc (dest, tmpsum, tmpsum));
12696 tmpsum = dest;
12700 /* If VALS is a vector constant that can be loaded into a register
12701 using VDUP, generate instructions to do so and return an RTX to
12702 assign to the register. Otherwise return NULL_RTX. */
12704 static rtx
12705 neon_vdup_constant (rtx vals)
12707 machine_mode mode = GET_MODE (vals);
12708 machine_mode inner_mode = GET_MODE_INNER (mode);
12709 int n_elts = GET_MODE_NUNITS (mode);
12710 bool all_same = true;
12711 rtx x;
12712 int i;
12714 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12715 return NULL_RTX;
12717 for (i = 0; i < n_elts; ++i)
12719 x = XVECEXP (vals, 0, i);
12720 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12721 all_same = false;
12724 if (!all_same)
12725 /* The elements are not all the same. We could handle repeating
12726 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12727 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12728 vdup.i16). */
12729 return NULL_RTX;
12731 /* We can load this constant by using VDUP and a constant in a
12732 single ARM register. This will be cheaper than a vector
12733 load. */
12735 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12736 return gen_rtx_VEC_DUPLICATE (mode, x);
12739 /* Generate code to load VALS, which is a PARALLEL containing only
12740 constants (for vec_init) or CONST_VECTOR, efficiently into a
12741 register. Returns an RTX to copy into the register, or NULL_RTX
12742 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12745 neon_make_constant (rtx vals)
12747 machine_mode mode = GET_MODE (vals);
12748 rtx target;
12749 rtx const_vec = NULL_RTX;
12750 int n_elts = GET_MODE_NUNITS (mode);
12751 int n_const = 0;
12752 int i;
12754 if (GET_CODE (vals) == CONST_VECTOR)
12755 const_vec = vals;
12756 else if (GET_CODE (vals) == PARALLEL)
12758 /* A CONST_VECTOR must contain only CONST_INTs and
12759 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12760 Only store valid constants in a CONST_VECTOR. */
12761 for (i = 0; i < n_elts; ++i)
12763 rtx x = XVECEXP (vals, 0, i);
12764 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12765 n_const++;
12767 if (n_const == n_elts)
12768 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12770 else
12771 gcc_unreachable ();
12773 if (const_vec != NULL
12774 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12775 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12776 return const_vec;
12777 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12778 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12779 pipeline cycle; creating the constant takes one or two ARM
12780 pipeline cycles. */
12781 return target;
12782 else if (const_vec != NULL_RTX)
12783 /* Load from constant pool. On Cortex-A8 this takes two cycles
12784 (for either double or quad vectors). We can not take advantage
12785 of single-cycle VLD1 because we need a PC-relative addressing
12786 mode. */
12787 return const_vec;
12788 else
12789 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12790 We can not construct an initializer. */
12791 return NULL_RTX;
12794 /* Initialize vector TARGET to VALS. */
12796 void
12797 neon_expand_vector_init (rtx target, rtx vals)
12799 machine_mode mode = GET_MODE (target);
12800 machine_mode inner_mode = GET_MODE_INNER (mode);
12801 int n_elts = GET_MODE_NUNITS (mode);
12802 int n_var = 0, one_var = -1;
12803 bool all_same = true;
12804 rtx x, mem;
12805 int i;
12807 for (i = 0; i < n_elts; ++i)
12809 x = XVECEXP (vals, 0, i);
12810 if (!CONSTANT_P (x))
12811 ++n_var, one_var = i;
12813 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12814 all_same = false;
12817 if (n_var == 0)
12819 rtx constant = neon_make_constant (vals);
12820 if (constant != NULL_RTX)
12822 emit_move_insn (target, constant);
12823 return;
12827 /* Splat a single non-constant element if we can. */
12828 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12830 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12831 emit_insn (gen_rtx_SET (VOIDmode, target,
12832 gen_rtx_VEC_DUPLICATE (mode, x)));
12833 return;
12836 /* One field is non-constant. Load constant then overwrite varying
12837 field. This is more efficient than using the stack. */
12838 if (n_var == 1)
12840 rtx copy = copy_rtx (vals);
12841 rtx index = GEN_INT (one_var);
12843 /* Load constant part of vector, substitute neighboring value for
12844 varying element. */
12845 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12846 neon_expand_vector_init (target, copy);
12848 /* Insert variable. */
12849 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12850 switch (mode)
12852 case V8QImode:
12853 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12854 break;
12855 case V16QImode:
12856 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12857 break;
12858 case V4HImode:
12859 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12860 break;
12861 case V8HImode:
12862 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12863 break;
12864 case V2SImode:
12865 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12866 break;
12867 case V4SImode:
12868 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12869 break;
12870 case V2SFmode:
12871 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12872 break;
12873 case V4SFmode:
12874 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12875 break;
12876 case V2DImode:
12877 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12878 break;
12879 default:
12880 gcc_unreachable ();
12882 return;
12885 /* Construct the vector in memory one field at a time
12886 and load the whole vector. */
12887 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12888 for (i = 0; i < n_elts; i++)
12889 emit_move_insn (adjust_address_nv (mem, inner_mode,
12890 i * GET_MODE_SIZE (inner_mode)),
12891 XVECEXP (vals, 0, i));
12892 emit_move_insn (target, mem);
12895 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12896 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12897 reported source locations are bogus. */
12899 static void
12900 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12901 const char *err)
12903 HOST_WIDE_INT lane;
12905 gcc_assert (CONST_INT_P (operand));
12907 lane = INTVAL (operand);
12909 if (lane < low || lane >= high)
12910 error (err);
12913 /* Bounds-check lanes. */
12915 void
12916 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12918 bounds_check (operand, low, high, "lane out of range");
12921 /* Bounds-check constants. */
12923 void
12924 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12926 bounds_check (operand, low, high, "constant out of range");
12929 HOST_WIDE_INT
12930 neon_element_bits (machine_mode mode)
12932 if (mode == DImode)
12933 return GET_MODE_BITSIZE (mode);
12934 else
12935 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12939 /* Predicates for `match_operand' and `match_operator'. */
12941 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12942 WB is true if full writeback address modes are allowed and is false
12943 if limited writeback address modes (POST_INC and PRE_DEC) are
12944 allowed. */
12947 arm_coproc_mem_operand (rtx op, bool wb)
12949 rtx ind;
12951 /* Reject eliminable registers. */
12952 if (! (reload_in_progress || reload_completed || lra_in_progress)
12953 && ( reg_mentioned_p (frame_pointer_rtx, op)
12954 || reg_mentioned_p (arg_pointer_rtx, op)
12955 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12956 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12957 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12958 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12959 return FALSE;
12961 /* Constants are converted into offsets from labels. */
12962 if (!MEM_P (op))
12963 return FALSE;
12965 ind = XEXP (op, 0);
12967 if (reload_completed
12968 && (GET_CODE (ind) == LABEL_REF
12969 || (GET_CODE (ind) == CONST
12970 && GET_CODE (XEXP (ind, 0)) == PLUS
12971 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12972 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12973 return TRUE;
12975 /* Match: (mem (reg)). */
12976 if (REG_P (ind))
12977 return arm_address_register_rtx_p (ind, 0);
12979 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12980 acceptable in any case (subject to verification by
12981 arm_address_register_rtx_p). We need WB to be true to accept
12982 PRE_INC and POST_DEC. */
12983 if (GET_CODE (ind) == POST_INC
12984 || GET_CODE (ind) == PRE_DEC
12985 || (wb
12986 && (GET_CODE (ind) == PRE_INC
12987 || GET_CODE (ind) == POST_DEC)))
12988 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12990 if (wb
12991 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12992 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12993 && GET_CODE (XEXP (ind, 1)) == PLUS
12994 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12995 ind = XEXP (ind, 1);
12997 /* Match:
12998 (plus (reg)
12999 (const)). */
13000 if (GET_CODE (ind) == PLUS
13001 && REG_P (XEXP (ind, 0))
13002 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13003 && CONST_INT_P (XEXP (ind, 1))
13004 && INTVAL (XEXP (ind, 1)) > -1024
13005 && INTVAL (XEXP (ind, 1)) < 1024
13006 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13007 return TRUE;
13009 return FALSE;
13012 /* Return TRUE if OP is a memory operand which we can load or store a vector
13013 to/from. TYPE is one of the following values:
13014 0 - Vector load/stor (vldr)
13015 1 - Core registers (ldm)
13016 2 - Element/structure loads (vld1)
13019 neon_vector_mem_operand (rtx op, int type, bool strict)
13021 rtx ind;
13023 /* Reject eliminable registers. */
13024 if (! (reload_in_progress || reload_completed)
13025 && ( reg_mentioned_p (frame_pointer_rtx, op)
13026 || reg_mentioned_p (arg_pointer_rtx, op)
13027 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13028 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13029 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13030 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13031 return !strict;
13033 /* Constants are converted into offsets from labels. */
13034 if (!MEM_P (op))
13035 return FALSE;
13037 ind = XEXP (op, 0);
13039 if (reload_completed
13040 && (GET_CODE (ind) == LABEL_REF
13041 || (GET_CODE (ind) == CONST
13042 && GET_CODE (XEXP (ind, 0)) == PLUS
13043 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13044 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13045 return TRUE;
13047 /* Match: (mem (reg)). */
13048 if (REG_P (ind))
13049 return arm_address_register_rtx_p (ind, 0);
13051 /* Allow post-increment with Neon registers. */
13052 if ((type != 1 && GET_CODE (ind) == POST_INC)
13053 || (type == 0 && GET_CODE (ind) == PRE_DEC))
13054 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13056 /* Allow post-increment by register for VLDn */
13057 if (type == 2 && GET_CODE (ind) == POST_MODIFY
13058 && GET_CODE (XEXP (ind, 1)) == PLUS
13059 && REG_P (XEXP (XEXP (ind, 1), 1)))
13060 return true;
13062 /* Match:
13063 (plus (reg)
13064 (const)). */
13065 if (type == 0
13066 && GET_CODE (ind) == PLUS
13067 && REG_P (XEXP (ind, 0))
13068 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13069 && CONST_INT_P (XEXP (ind, 1))
13070 && INTVAL (XEXP (ind, 1)) > -1024
13071 /* For quad modes, we restrict the constant offset to be slightly less
13072 than what the instruction format permits. We have no such constraint
13073 on double mode offsets. (This must match arm_legitimate_index_p.) */
13074 && (INTVAL (XEXP (ind, 1))
13075 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13076 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13077 return TRUE;
13079 return FALSE;
13082 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13083 type. */
13085 neon_struct_mem_operand (rtx op)
13087 rtx ind;
13089 /* Reject eliminable registers. */
13090 if (! (reload_in_progress || reload_completed)
13091 && ( reg_mentioned_p (frame_pointer_rtx, op)
13092 || reg_mentioned_p (arg_pointer_rtx, op)
13093 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13094 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13095 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13096 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13097 return FALSE;
13099 /* Constants are converted into offsets from labels. */
13100 if (!MEM_P (op))
13101 return FALSE;
13103 ind = XEXP (op, 0);
13105 if (reload_completed
13106 && (GET_CODE (ind) == LABEL_REF
13107 || (GET_CODE (ind) == CONST
13108 && GET_CODE (XEXP (ind, 0)) == PLUS
13109 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13110 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13111 return TRUE;
13113 /* Match: (mem (reg)). */
13114 if (REG_P (ind))
13115 return arm_address_register_rtx_p (ind, 0);
13117 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13118 if (GET_CODE (ind) == POST_INC
13119 || GET_CODE (ind) == PRE_DEC)
13120 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13122 return FALSE;
13125 /* Return true if X is a register that will be eliminated later on. */
13127 arm_eliminable_register (rtx x)
13129 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13130 || REGNO (x) == ARG_POINTER_REGNUM
13131 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13132 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13135 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13136 coprocessor registers. Otherwise return NO_REGS. */
13138 enum reg_class
13139 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13141 if (mode == HFmode)
13143 if (!TARGET_NEON_FP16)
13144 return GENERAL_REGS;
13145 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13146 return NO_REGS;
13147 return GENERAL_REGS;
13150 /* The neon move patterns handle all legitimate vector and struct
13151 addresses. */
13152 if (TARGET_NEON
13153 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13154 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13155 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13156 || VALID_NEON_STRUCT_MODE (mode)))
13157 return NO_REGS;
13159 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13160 return NO_REGS;
13162 return GENERAL_REGS;
13165 /* Values which must be returned in the most-significant end of the return
13166 register. */
13168 static bool
13169 arm_return_in_msb (const_tree valtype)
13171 return (TARGET_AAPCS_BASED
13172 && BYTES_BIG_ENDIAN
13173 && (AGGREGATE_TYPE_P (valtype)
13174 || TREE_CODE (valtype) == COMPLEX_TYPE
13175 || FIXED_POINT_TYPE_P (valtype)));
13178 /* Return TRUE if X references a SYMBOL_REF. */
13180 symbol_mentioned_p (rtx x)
13182 const char * fmt;
13183 int i;
13185 if (GET_CODE (x) == SYMBOL_REF)
13186 return 1;
13188 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13189 are constant offsets, not symbols. */
13190 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13191 return 0;
13193 fmt = GET_RTX_FORMAT (GET_CODE (x));
13195 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13197 if (fmt[i] == 'E')
13199 int j;
13201 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13202 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13203 return 1;
13205 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13206 return 1;
13209 return 0;
13212 /* Return TRUE if X references a LABEL_REF. */
13214 label_mentioned_p (rtx x)
13216 const char * fmt;
13217 int i;
13219 if (GET_CODE (x) == LABEL_REF)
13220 return 1;
13222 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13223 instruction, but they are constant offsets, not symbols. */
13224 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13225 return 0;
13227 fmt = GET_RTX_FORMAT (GET_CODE (x));
13228 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13230 if (fmt[i] == 'E')
13232 int j;
13234 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13235 if (label_mentioned_p (XVECEXP (x, i, j)))
13236 return 1;
13238 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13239 return 1;
13242 return 0;
13246 tls_mentioned_p (rtx x)
13248 switch (GET_CODE (x))
13250 case CONST:
13251 return tls_mentioned_p (XEXP (x, 0));
13253 case UNSPEC:
13254 if (XINT (x, 1) == UNSPEC_TLS)
13255 return 1;
13257 default:
13258 return 0;
13262 /* Must not copy any rtx that uses a pc-relative address. */
13264 static bool
13265 arm_cannot_copy_insn_p (rtx_insn *insn)
13267 /* The tls call insn cannot be copied, as it is paired with a data
13268 word. */
13269 if (recog_memoized (insn) == CODE_FOR_tlscall)
13270 return true;
13272 subrtx_iterator::array_type array;
13273 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13275 const_rtx x = *iter;
13276 if (GET_CODE (x) == UNSPEC
13277 && (XINT (x, 1) == UNSPEC_PIC_BASE
13278 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13279 return true;
13281 return false;
13284 enum rtx_code
13285 minmax_code (rtx x)
13287 enum rtx_code code = GET_CODE (x);
13289 switch (code)
13291 case SMAX:
13292 return GE;
13293 case SMIN:
13294 return LE;
13295 case UMIN:
13296 return LEU;
13297 case UMAX:
13298 return GEU;
13299 default:
13300 gcc_unreachable ();
13304 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13306 bool
13307 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13308 int *mask, bool *signed_sat)
13310 /* The high bound must be a power of two minus one. */
13311 int log = exact_log2 (INTVAL (hi_bound) + 1);
13312 if (log == -1)
13313 return false;
13315 /* The low bound is either zero (for usat) or one less than the
13316 negation of the high bound (for ssat). */
13317 if (INTVAL (lo_bound) == 0)
13319 if (mask)
13320 *mask = log;
13321 if (signed_sat)
13322 *signed_sat = false;
13324 return true;
13327 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13329 if (mask)
13330 *mask = log + 1;
13331 if (signed_sat)
13332 *signed_sat = true;
13334 return true;
13337 return false;
13340 /* Return 1 if memory locations are adjacent. */
13342 adjacent_mem_locations (rtx a, rtx b)
13344 /* We don't guarantee to preserve the order of these memory refs. */
13345 if (volatile_refs_p (a) || volatile_refs_p (b))
13346 return 0;
13348 if ((REG_P (XEXP (a, 0))
13349 || (GET_CODE (XEXP (a, 0)) == PLUS
13350 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13351 && (REG_P (XEXP (b, 0))
13352 || (GET_CODE (XEXP (b, 0)) == PLUS
13353 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13355 HOST_WIDE_INT val0 = 0, val1 = 0;
13356 rtx reg0, reg1;
13357 int val_diff;
13359 if (GET_CODE (XEXP (a, 0)) == PLUS)
13361 reg0 = XEXP (XEXP (a, 0), 0);
13362 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13364 else
13365 reg0 = XEXP (a, 0);
13367 if (GET_CODE (XEXP (b, 0)) == PLUS)
13369 reg1 = XEXP (XEXP (b, 0), 0);
13370 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13372 else
13373 reg1 = XEXP (b, 0);
13375 /* Don't accept any offset that will require multiple
13376 instructions to handle, since this would cause the
13377 arith_adjacentmem pattern to output an overlong sequence. */
13378 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13379 return 0;
13381 /* Don't allow an eliminable register: register elimination can make
13382 the offset too large. */
13383 if (arm_eliminable_register (reg0))
13384 return 0;
13386 val_diff = val1 - val0;
13388 if (arm_ld_sched)
13390 /* If the target has load delay slots, then there's no benefit
13391 to using an ldm instruction unless the offset is zero and
13392 we are optimizing for size. */
13393 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13394 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13395 && (val_diff == 4 || val_diff == -4));
13398 return ((REGNO (reg0) == REGNO (reg1))
13399 && (val_diff == 4 || val_diff == -4));
13402 return 0;
13405 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13406 for load operations, false for store operations. CONSECUTIVE is true
13407 if the register numbers in the operation must be consecutive in the register
13408 bank. RETURN_PC is true if value is to be loaded in PC.
13409 The pattern we are trying to match for load is:
13410 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13411 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13414 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13416 where
13417 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13418 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13419 3. If consecutive is TRUE, then for kth register being loaded,
13420 REGNO (R_dk) = REGNO (R_d0) + k.
13421 The pattern for store is similar. */
13422 bool
13423 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13424 bool consecutive, bool return_pc)
13426 HOST_WIDE_INT count = XVECLEN (op, 0);
13427 rtx reg, mem, addr;
13428 unsigned regno;
13429 unsigned first_regno;
13430 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13431 rtx elt;
13432 bool addr_reg_in_reglist = false;
13433 bool update = false;
13434 int reg_increment;
13435 int offset_adj;
13436 int regs_per_val;
13438 /* If not in SImode, then registers must be consecutive
13439 (e.g., VLDM instructions for DFmode). */
13440 gcc_assert ((mode == SImode) || consecutive);
13441 /* Setting return_pc for stores is illegal. */
13442 gcc_assert (!return_pc || load);
13444 /* Set up the increments and the regs per val based on the mode. */
13445 reg_increment = GET_MODE_SIZE (mode);
13446 regs_per_val = reg_increment / 4;
13447 offset_adj = return_pc ? 1 : 0;
13449 if (count <= 1
13450 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13451 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13452 return false;
13454 /* Check if this is a write-back. */
13455 elt = XVECEXP (op, 0, offset_adj);
13456 if (GET_CODE (SET_SRC (elt)) == PLUS)
13458 i++;
13459 base = 1;
13460 update = true;
13462 /* The offset adjustment must be the number of registers being
13463 popped times the size of a single register. */
13464 if (!REG_P (SET_DEST (elt))
13465 || !REG_P (XEXP (SET_SRC (elt), 0))
13466 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13467 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13468 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13469 ((count - 1 - offset_adj) * reg_increment))
13470 return false;
13473 i = i + offset_adj;
13474 base = base + offset_adj;
13475 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13476 success depends on the type: VLDM can do just one reg,
13477 LDM must do at least two. */
13478 if ((count <= i) && (mode == SImode))
13479 return false;
13481 elt = XVECEXP (op, 0, i - 1);
13482 if (GET_CODE (elt) != SET)
13483 return false;
13485 if (load)
13487 reg = SET_DEST (elt);
13488 mem = SET_SRC (elt);
13490 else
13492 reg = SET_SRC (elt);
13493 mem = SET_DEST (elt);
13496 if (!REG_P (reg) || !MEM_P (mem))
13497 return false;
13499 regno = REGNO (reg);
13500 first_regno = regno;
13501 addr = XEXP (mem, 0);
13502 if (GET_CODE (addr) == PLUS)
13504 if (!CONST_INT_P (XEXP (addr, 1)))
13505 return false;
13507 offset = INTVAL (XEXP (addr, 1));
13508 addr = XEXP (addr, 0);
13511 if (!REG_P (addr))
13512 return false;
13514 /* Don't allow SP to be loaded unless it is also the base register. It
13515 guarantees that SP is reset correctly when an LDM instruction
13516 is interrupted. Otherwise, we might end up with a corrupt stack. */
13517 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13518 return false;
13520 for (; i < count; i++)
13522 elt = XVECEXP (op, 0, i);
13523 if (GET_CODE (elt) != SET)
13524 return false;
13526 if (load)
13528 reg = SET_DEST (elt);
13529 mem = SET_SRC (elt);
13531 else
13533 reg = SET_SRC (elt);
13534 mem = SET_DEST (elt);
13537 if (!REG_P (reg)
13538 || GET_MODE (reg) != mode
13539 || REGNO (reg) <= regno
13540 || (consecutive
13541 && (REGNO (reg) !=
13542 (unsigned int) (first_regno + regs_per_val * (i - base))))
13543 /* Don't allow SP to be loaded unless it is also the base register. It
13544 guarantees that SP is reset correctly when an LDM instruction
13545 is interrupted. Otherwise, we might end up with a corrupt stack. */
13546 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13547 || !MEM_P (mem)
13548 || GET_MODE (mem) != mode
13549 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13550 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13551 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13552 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13553 offset + (i - base) * reg_increment))
13554 && (!REG_P (XEXP (mem, 0))
13555 || offset + (i - base) * reg_increment != 0)))
13556 return false;
13558 regno = REGNO (reg);
13559 if (regno == REGNO (addr))
13560 addr_reg_in_reglist = true;
13563 if (load)
13565 if (update && addr_reg_in_reglist)
13566 return false;
13568 /* For Thumb-1, address register is always modified - either by write-back
13569 or by explicit load. If the pattern does not describe an update,
13570 then the address register must be in the list of loaded registers. */
13571 if (TARGET_THUMB1)
13572 return update || addr_reg_in_reglist;
13575 return true;
13578 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13579 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13580 instruction. ADD_OFFSET is nonzero if the base address register needs
13581 to be modified with an add instruction before we can use it. */
13583 static bool
13584 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13585 int nops, HOST_WIDE_INT add_offset)
13587 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13588 if the offset isn't small enough. The reason 2 ldrs are faster
13589 is because these ARMs are able to do more than one cache access
13590 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13591 whilst the ARM8 has a double bandwidth cache. This means that
13592 these cores can do both an instruction fetch and a data fetch in
13593 a single cycle, so the trick of calculating the address into a
13594 scratch register (one of the result regs) and then doing a load
13595 multiple actually becomes slower (and no smaller in code size).
13596 That is the transformation
13598 ldr rd1, [rbase + offset]
13599 ldr rd2, [rbase + offset + 4]
13603 add rd1, rbase, offset
13604 ldmia rd1, {rd1, rd2}
13606 produces worse code -- '3 cycles + any stalls on rd2' instead of
13607 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13608 access per cycle, the first sequence could never complete in less
13609 than 6 cycles, whereas the ldm sequence would only take 5 and
13610 would make better use of sequential accesses if not hitting the
13611 cache.
13613 We cheat here and test 'arm_ld_sched' which we currently know to
13614 only be true for the ARM8, ARM9 and StrongARM. If this ever
13615 changes, then the test below needs to be reworked. */
13616 if (nops == 2 && arm_ld_sched && add_offset != 0)
13617 return false;
13619 /* XScale has load-store double instructions, but they have stricter
13620 alignment requirements than load-store multiple, so we cannot
13621 use them.
13623 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13624 the pipeline until completion.
13626 NREGS CYCLES
13632 An ldr instruction takes 1-3 cycles, but does not block the
13633 pipeline.
13635 NREGS CYCLES
13636 1 1-3
13637 2 2-6
13638 3 3-9
13639 4 4-12
13641 Best case ldr will always win. However, the more ldr instructions
13642 we issue, the less likely we are to be able to schedule them well.
13643 Using ldr instructions also increases code size.
13645 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13646 for counts of 3 or 4 regs. */
13647 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13648 return false;
13649 return true;
13652 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13653 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13654 an array ORDER which describes the sequence to use when accessing the
13655 offsets that produces an ascending order. In this sequence, each
13656 offset must be larger by exactly 4 than the previous one. ORDER[0]
13657 must have been filled in with the lowest offset by the caller.
13658 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13659 we use to verify that ORDER produces an ascending order of registers.
13660 Return true if it was possible to construct such an order, false if
13661 not. */
13663 static bool
13664 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13665 int *unsorted_regs)
13667 int i;
13668 for (i = 1; i < nops; i++)
13670 int j;
13672 order[i] = order[i - 1];
13673 for (j = 0; j < nops; j++)
13674 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13676 /* We must find exactly one offset that is higher than the
13677 previous one by 4. */
13678 if (order[i] != order[i - 1])
13679 return false;
13680 order[i] = j;
13682 if (order[i] == order[i - 1])
13683 return false;
13684 /* The register numbers must be ascending. */
13685 if (unsorted_regs != NULL
13686 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13687 return false;
13689 return true;
13692 /* Used to determine in a peephole whether a sequence of load
13693 instructions can be changed into a load-multiple instruction.
13694 NOPS is the number of separate load instructions we are examining. The
13695 first NOPS entries in OPERANDS are the destination registers, the
13696 next NOPS entries are memory operands. If this function is
13697 successful, *BASE is set to the common base register of the memory
13698 accesses; *LOAD_OFFSET is set to the first memory location's offset
13699 from that base register.
13700 REGS is an array filled in with the destination register numbers.
13701 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13702 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13703 the sequence of registers in REGS matches the loads from ascending memory
13704 locations, and the function verifies that the register numbers are
13705 themselves ascending. If CHECK_REGS is false, the register numbers
13706 are stored in the order they are found in the operands. */
13707 static int
13708 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13709 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13711 int unsorted_regs[MAX_LDM_STM_OPS];
13712 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13713 int order[MAX_LDM_STM_OPS];
13714 rtx base_reg_rtx = NULL;
13715 int base_reg = -1;
13716 int i, ldm_case;
13718 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13719 easily extended if required. */
13720 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13722 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13724 /* Loop over the operands and check that the memory references are
13725 suitable (i.e. immediate offsets from the same base register). At
13726 the same time, extract the target register, and the memory
13727 offsets. */
13728 for (i = 0; i < nops; i++)
13730 rtx reg;
13731 rtx offset;
13733 /* Convert a subreg of a mem into the mem itself. */
13734 if (GET_CODE (operands[nops + i]) == SUBREG)
13735 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13737 gcc_assert (MEM_P (operands[nops + i]));
13739 /* Don't reorder volatile memory references; it doesn't seem worth
13740 looking for the case where the order is ok anyway. */
13741 if (MEM_VOLATILE_P (operands[nops + i]))
13742 return 0;
13744 offset = const0_rtx;
13746 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13747 || (GET_CODE (reg) == SUBREG
13748 && REG_P (reg = SUBREG_REG (reg))))
13749 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13750 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13751 || (GET_CODE (reg) == SUBREG
13752 && REG_P (reg = SUBREG_REG (reg))))
13753 && (CONST_INT_P (offset
13754 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13756 if (i == 0)
13758 base_reg = REGNO (reg);
13759 base_reg_rtx = reg;
13760 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13761 return 0;
13763 else if (base_reg != (int) REGNO (reg))
13764 /* Not addressed from the same base register. */
13765 return 0;
13767 unsorted_regs[i] = (REG_P (operands[i])
13768 ? REGNO (operands[i])
13769 : REGNO (SUBREG_REG (operands[i])));
13771 /* If it isn't an integer register, or if it overwrites the
13772 base register but isn't the last insn in the list, then
13773 we can't do this. */
13774 if (unsorted_regs[i] < 0
13775 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13776 || unsorted_regs[i] > 14
13777 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13778 return 0;
13780 /* Don't allow SP to be loaded unless it is also the base
13781 register. It guarantees that SP is reset correctly when
13782 an LDM instruction is interrupted. Otherwise, we might
13783 end up with a corrupt stack. */
13784 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13785 return 0;
13787 unsorted_offsets[i] = INTVAL (offset);
13788 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13789 order[0] = i;
13791 else
13792 /* Not a suitable memory address. */
13793 return 0;
13796 /* All the useful information has now been extracted from the
13797 operands into unsorted_regs and unsorted_offsets; additionally,
13798 order[0] has been set to the lowest offset in the list. Sort
13799 the offsets into order, verifying that they are adjacent, and
13800 check that the register numbers are ascending. */
13801 if (!compute_offset_order (nops, unsorted_offsets, order,
13802 check_regs ? unsorted_regs : NULL))
13803 return 0;
13805 if (saved_order)
13806 memcpy (saved_order, order, sizeof order);
13808 if (base)
13810 *base = base_reg;
13812 for (i = 0; i < nops; i++)
13813 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13815 *load_offset = unsorted_offsets[order[0]];
13818 if (TARGET_THUMB1
13819 && !peep2_reg_dead_p (nops, base_reg_rtx))
13820 return 0;
13822 if (unsorted_offsets[order[0]] == 0)
13823 ldm_case = 1; /* ldmia */
13824 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13825 ldm_case = 2; /* ldmib */
13826 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13827 ldm_case = 3; /* ldmda */
13828 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13829 ldm_case = 4; /* ldmdb */
13830 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13831 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13832 ldm_case = 5;
13833 else
13834 return 0;
13836 if (!multiple_operation_profitable_p (false, nops,
13837 ldm_case == 5
13838 ? unsorted_offsets[order[0]] : 0))
13839 return 0;
13841 return ldm_case;
13844 /* Used to determine in a peephole whether a sequence of store instructions can
13845 be changed into a store-multiple instruction.
13846 NOPS is the number of separate store instructions we are examining.
13847 NOPS_TOTAL is the total number of instructions recognized by the peephole
13848 pattern.
13849 The first NOPS entries in OPERANDS are the source registers, the next
13850 NOPS entries are memory operands. If this function is successful, *BASE is
13851 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13852 to the first memory location's offset from that base register. REGS is an
13853 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13854 likewise filled with the corresponding rtx's.
13855 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13856 numbers to an ascending order of stores.
13857 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13858 from ascending memory locations, and the function verifies that the register
13859 numbers are themselves ascending. If CHECK_REGS is false, the register
13860 numbers are stored in the order they are found in the operands. */
13861 static int
13862 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13863 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13864 HOST_WIDE_INT *load_offset, bool check_regs)
13866 int unsorted_regs[MAX_LDM_STM_OPS];
13867 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13868 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13869 int order[MAX_LDM_STM_OPS];
13870 int base_reg = -1;
13871 rtx base_reg_rtx = NULL;
13872 int i, stm_case;
13874 /* Write back of base register is currently only supported for Thumb 1. */
13875 int base_writeback = TARGET_THUMB1;
13877 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13878 easily extended if required. */
13879 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13881 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13883 /* Loop over the operands and check that the memory references are
13884 suitable (i.e. immediate offsets from the same base register). At
13885 the same time, extract the target register, and the memory
13886 offsets. */
13887 for (i = 0; i < nops; i++)
13889 rtx reg;
13890 rtx offset;
13892 /* Convert a subreg of a mem into the mem itself. */
13893 if (GET_CODE (operands[nops + i]) == SUBREG)
13894 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13896 gcc_assert (MEM_P (operands[nops + i]));
13898 /* Don't reorder volatile memory references; it doesn't seem worth
13899 looking for the case where the order is ok anyway. */
13900 if (MEM_VOLATILE_P (operands[nops + i]))
13901 return 0;
13903 offset = const0_rtx;
13905 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13906 || (GET_CODE (reg) == SUBREG
13907 && REG_P (reg = SUBREG_REG (reg))))
13908 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13909 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13910 || (GET_CODE (reg) == SUBREG
13911 && REG_P (reg = SUBREG_REG (reg))))
13912 && (CONST_INT_P (offset
13913 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13915 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13916 ? operands[i] : SUBREG_REG (operands[i]));
13917 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13919 if (i == 0)
13921 base_reg = REGNO (reg);
13922 base_reg_rtx = reg;
13923 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13924 return 0;
13926 else if (base_reg != (int) REGNO (reg))
13927 /* Not addressed from the same base register. */
13928 return 0;
13930 /* If it isn't an integer register, then we can't do this. */
13931 if (unsorted_regs[i] < 0
13932 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13933 /* The effects are unpredictable if the base register is
13934 both updated and stored. */
13935 || (base_writeback && unsorted_regs[i] == base_reg)
13936 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13937 || unsorted_regs[i] > 14)
13938 return 0;
13940 unsorted_offsets[i] = INTVAL (offset);
13941 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13942 order[0] = i;
13944 else
13945 /* Not a suitable memory address. */
13946 return 0;
13949 /* All the useful information has now been extracted from the
13950 operands into unsorted_regs and unsorted_offsets; additionally,
13951 order[0] has been set to the lowest offset in the list. Sort
13952 the offsets into order, verifying that they are adjacent, and
13953 check that the register numbers are ascending. */
13954 if (!compute_offset_order (nops, unsorted_offsets, order,
13955 check_regs ? unsorted_regs : NULL))
13956 return 0;
13958 if (saved_order)
13959 memcpy (saved_order, order, sizeof order);
13961 if (base)
13963 *base = base_reg;
13965 for (i = 0; i < nops; i++)
13967 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13968 if (reg_rtxs)
13969 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13972 *load_offset = unsorted_offsets[order[0]];
13975 if (TARGET_THUMB1
13976 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13977 return 0;
13979 if (unsorted_offsets[order[0]] == 0)
13980 stm_case = 1; /* stmia */
13981 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13982 stm_case = 2; /* stmib */
13983 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13984 stm_case = 3; /* stmda */
13985 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13986 stm_case = 4; /* stmdb */
13987 else
13988 return 0;
13990 if (!multiple_operation_profitable_p (false, nops, 0))
13991 return 0;
13993 return stm_case;
13996 /* Routines for use in generating RTL. */
13998 /* Generate a load-multiple instruction. COUNT is the number of loads in
13999 the instruction; REGS and MEMS are arrays containing the operands.
14000 BASEREG is the base register to be used in addressing the memory operands.
14001 WBACK_OFFSET is nonzero if the instruction should update the base
14002 register. */
14004 static rtx
14005 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14006 HOST_WIDE_INT wback_offset)
14008 int i = 0, j;
14009 rtx result;
14011 if (!multiple_operation_profitable_p (false, count, 0))
14013 rtx seq;
14015 start_sequence ();
14017 for (i = 0; i < count; i++)
14018 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
14020 if (wback_offset != 0)
14021 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14023 seq = get_insns ();
14024 end_sequence ();
14026 return seq;
14029 result = gen_rtx_PARALLEL (VOIDmode,
14030 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14031 if (wback_offset != 0)
14033 XVECEXP (result, 0, 0)
14034 = gen_rtx_SET (VOIDmode, basereg,
14035 plus_constant (Pmode, basereg, wback_offset));
14036 i = 1;
14037 count++;
14040 for (j = 0; i < count; i++, j++)
14041 XVECEXP (result, 0, i)
14042 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
14044 return result;
14047 /* Generate a store-multiple instruction. COUNT is the number of stores in
14048 the instruction; REGS and MEMS are arrays containing the operands.
14049 BASEREG is the base register to be used in addressing the memory operands.
14050 WBACK_OFFSET is nonzero if the instruction should update the base
14051 register. */
14053 static rtx
14054 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14055 HOST_WIDE_INT wback_offset)
14057 int i = 0, j;
14058 rtx result;
14060 if (GET_CODE (basereg) == PLUS)
14061 basereg = XEXP (basereg, 0);
14063 if (!multiple_operation_profitable_p (false, count, 0))
14065 rtx seq;
14067 start_sequence ();
14069 for (i = 0; i < count; i++)
14070 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14072 if (wback_offset != 0)
14073 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14075 seq = get_insns ();
14076 end_sequence ();
14078 return seq;
14081 result = gen_rtx_PARALLEL (VOIDmode,
14082 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14083 if (wback_offset != 0)
14085 XVECEXP (result, 0, 0)
14086 = gen_rtx_SET (VOIDmode, basereg,
14087 plus_constant (Pmode, basereg, wback_offset));
14088 i = 1;
14089 count++;
14092 for (j = 0; i < count; i++, j++)
14093 XVECEXP (result, 0, i)
14094 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
14096 return result;
14099 /* Generate either a load-multiple or a store-multiple instruction. This
14100 function can be used in situations where we can start with a single MEM
14101 rtx and adjust its address upwards.
14102 COUNT is the number of operations in the instruction, not counting a
14103 possible update of the base register. REGS is an array containing the
14104 register operands.
14105 BASEREG is the base register to be used in addressing the memory operands,
14106 which are constructed from BASEMEM.
14107 WRITE_BACK specifies whether the generated instruction should include an
14108 update of the base register.
14109 OFFSETP is used to pass an offset to and from this function; this offset
14110 is not used when constructing the address (instead BASEMEM should have an
14111 appropriate offset in its address), it is used only for setting
14112 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14114 static rtx
14115 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14116 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14118 rtx mems[MAX_LDM_STM_OPS];
14119 HOST_WIDE_INT offset = *offsetp;
14120 int i;
14122 gcc_assert (count <= MAX_LDM_STM_OPS);
14124 if (GET_CODE (basereg) == PLUS)
14125 basereg = XEXP (basereg, 0);
14127 for (i = 0; i < count; i++)
14129 rtx addr = plus_constant (Pmode, basereg, i * 4);
14130 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14131 offset += 4;
14134 if (write_back)
14135 *offsetp = offset;
14137 if (is_load)
14138 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14139 write_back ? 4 * count : 0);
14140 else
14141 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14142 write_back ? 4 * count : 0);
14146 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14147 rtx basemem, HOST_WIDE_INT *offsetp)
14149 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14150 offsetp);
14154 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14155 rtx basemem, HOST_WIDE_INT *offsetp)
14157 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14158 offsetp);
14161 /* Called from a peephole2 expander to turn a sequence of loads into an
14162 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14163 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14164 is true if we can reorder the registers because they are used commutatively
14165 subsequently.
14166 Returns true iff we could generate a new instruction. */
14168 bool
14169 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14171 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14172 rtx mems[MAX_LDM_STM_OPS];
14173 int i, j, base_reg;
14174 rtx base_reg_rtx;
14175 HOST_WIDE_INT offset;
14176 int write_back = FALSE;
14177 int ldm_case;
14178 rtx addr;
14180 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14181 &base_reg, &offset, !sort_regs);
14183 if (ldm_case == 0)
14184 return false;
14186 if (sort_regs)
14187 for (i = 0; i < nops - 1; i++)
14188 for (j = i + 1; j < nops; j++)
14189 if (regs[i] > regs[j])
14191 int t = regs[i];
14192 regs[i] = regs[j];
14193 regs[j] = t;
14195 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14197 if (TARGET_THUMB1)
14199 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14200 gcc_assert (ldm_case == 1 || ldm_case == 5);
14201 write_back = TRUE;
14204 if (ldm_case == 5)
14206 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14207 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14208 offset = 0;
14209 if (!TARGET_THUMB1)
14211 base_reg = regs[0];
14212 base_reg_rtx = newbase;
14216 for (i = 0; i < nops; i++)
14218 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14219 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14220 SImode, addr, 0);
14222 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14223 write_back ? offset + i * 4 : 0));
14224 return true;
14227 /* Called from a peephole2 expander to turn a sequence of stores into an
14228 STM instruction. OPERANDS are the operands found by the peephole matcher;
14229 NOPS indicates how many separate stores we are trying to combine.
14230 Returns true iff we could generate a new instruction. */
14232 bool
14233 gen_stm_seq (rtx *operands, int nops)
14235 int i;
14236 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14237 rtx mems[MAX_LDM_STM_OPS];
14238 int base_reg;
14239 rtx base_reg_rtx;
14240 HOST_WIDE_INT offset;
14241 int write_back = FALSE;
14242 int stm_case;
14243 rtx addr;
14244 bool base_reg_dies;
14246 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14247 mem_order, &base_reg, &offset, true);
14249 if (stm_case == 0)
14250 return false;
14252 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14254 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14255 if (TARGET_THUMB1)
14257 gcc_assert (base_reg_dies);
14258 write_back = TRUE;
14261 if (stm_case == 5)
14263 gcc_assert (base_reg_dies);
14264 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14265 offset = 0;
14268 addr = plus_constant (Pmode, base_reg_rtx, offset);
14270 for (i = 0; i < nops; i++)
14272 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14273 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14274 SImode, addr, 0);
14276 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14277 write_back ? offset + i * 4 : 0));
14278 return true;
14281 /* Called from a peephole2 expander to turn a sequence of stores that are
14282 preceded by constant loads into an STM instruction. OPERANDS are the
14283 operands found by the peephole matcher; NOPS indicates how many
14284 separate stores we are trying to combine; there are 2 * NOPS
14285 instructions in the peephole.
14286 Returns true iff we could generate a new instruction. */
14288 bool
14289 gen_const_stm_seq (rtx *operands, int nops)
14291 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14292 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14293 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14294 rtx mems[MAX_LDM_STM_OPS];
14295 int base_reg;
14296 rtx base_reg_rtx;
14297 HOST_WIDE_INT offset;
14298 int write_back = FALSE;
14299 int stm_case;
14300 rtx addr;
14301 bool base_reg_dies;
14302 int i, j;
14303 HARD_REG_SET allocated;
14305 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14306 mem_order, &base_reg, &offset, false);
14308 if (stm_case == 0)
14309 return false;
14311 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14313 /* If the same register is used more than once, try to find a free
14314 register. */
14315 CLEAR_HARD_REG_SET (allocated);
14316 for (i = 0; i < nops; i++)
14318 for (j = i + 1; j < nops; j++)
14319 if (regs[i] == regs[j])
14321 rtx t = peep2_find_free_register (0, nops * 2,
14322 TARGET_THUMB1 ? "l" : "r",
14323 SImode, &allocated);
14324 if (t == NULL_RTX)
14325 return false;
14326 reg_rtxs[i] = t;
14327 regs[i] = REGNO (t);
14331 /* Compute an ordering that maps the register numbers to an ascending
14332 sequence. */
14333 reg_order[0] = 0;
14334 for (i = 0; i < nops; i++)
14335 if (regs[i] < regs[reg_order[0]])
14336 reg_order[0] = i;
14338 for (i = 1; i < nops; i++)
14340 int this_order = reg_order[i - 1];
14341 for (j = 0; j < nops; j++)
14342 if (regs[j] > regs[reg_order[i - 1]]
14343 && (this_order == reg_order[i - 1]
14344 || regs[j] < regs[this_order]))
14345 this_order = j;
14346 reg_order[i] = this_order;
14349 /* Ensure that registers that must be live after the instruction end
14350 up with the correct value. */
14351 for (i = 0; i < nops; i++)
14353 int this_order = reg_order[i];
14354 if ((this_order != mem_order[i]
14355 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14356 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14357 return false;
14360 /* Load the constants. */
14361 for (i = 0; i < nops; i++)
14363 rtx op = operands[2 * nops + mem_order[i]];
14364 sorted_regs[i] = regs[reg_order[i]];
14365 emit_move_insn (reg_rtxs[reg_order[i]], op);
14368 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14370 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14371 if (TARGET_THUMB1)
14373 gcc_assert (base_reg_dies);
14374 write_back = TRUE;
14377 if (stm_case == 5)
14379 gcc_assert (base_reg_dies);
14380 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14381 offset = 0;
14384 addr = plus_constant (Pmode, base_reg_rtx, offset);
14386 for (i = 0; i < nops; i++)
14388 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14389 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14390 SImode, addr, 0);
14392 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14393 write_back ? offset + i * 4 : 0));
14394 return true;
14397 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14398 unaligned copies on processors which support unaligned semantics for those
14399 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14400 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14401 An interleave factor of 1 (the minimum) will perform no interleaving.
14402 Load/store multiple are used for aligned addresses where possible. */
14404 static void
14405 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14406 HOST_WIDE_INT length,
14407 unsigned int interleave_factor)
14409 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14410 int *regnos = XALLOCAVEC (int, interleave_factor);
14411 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14412 HOST_WIDE_INT i, j;
14413 HOST_WIDE_INT remaining = length, words;
14414 rtx halfword_tmp = NULL, byte_tmp = NULL;
14415 rtx dst, src;
14416 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14417 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14418 HOST_WIDE_INT srcoffset, dstoffset;
14419 HOST_WIDE_INT src_autoinc, dst_autoinc;
14420 rtx mem, addr;
14422 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14424 /* Use hard registers if we have aligned source or destination so we can use
14425 load/store multiple with contiguous registers. */
14426 if (dst_aligned || src_aligned)
14427 for (i = 0; i < interleave_factor; i++)
14428 regs[i] = gen_rtx_REG (SImode, i);
14429 else
14430 for (i = 0; i < interleave_factor; i++)
14431 regs[i] = gen_reg_rtx (SImode);
14433 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14434 src = copy_addr_to_reg (XEXP (srcbase, 0));
14436 srcoffset = dstoffset = 0;
14438 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14439 For copying the last bytes we want to subtract this offset again. */
14440 src_autoinc = dst_autoinc = 0;
14442 for (i = 0; i < interleave_factor; i++)
14443 regnos[i] = i;
14445 /* Copy BLOCK_SIZE_BYTES chunks. */
14447 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14449 /* Load words. */
14450 if (src_aligned && interleave_factor > 1)
14452 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14453 TRUE, srcbase, &srcoffset));
14454 src_autoinc += UNITS_PER_WORD * interleave_factor;
14456 else
14458 for (j = 0; j < interleave_factor; j++)
14460 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14461 - src_autoinc));
14462 mem = adjust_automodify_address (srcbase, SImode, addr,
14463 srcoffset + j * UNITS_PER_WORD);
14464 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14466 srcoffset += block_size_bytes;
14469 /* Store words. */
14470 if (dst_aligned && interleave_factor > 1)
14472 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14473 TRUE, dstbase, &dstoffset));
14474 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14476 else
14478 for (j = 0; j < interleave_factor; j++)
14480 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14481 - dst_autoinc));
14482 mem = adjust_automodify_address (dstbase, SImode, addr,
14483 dstoffset + j * UNITS_PER_WORD);
14484 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14486 dstoffset += block_size_bytes;
14489 remaining -= block_size_bytes;
14492 /* Copy any whole words left (note these aren't interleaved with any
14493 subsequent halfword/byte load/stores in the interests of simplicity). */
14495 words = remaining / UNITS_PER_WORD;
14497 gcc_assert (words < interleave_factor);
14499 if (src_aligned && words > 1)
14501 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14502 &srcoffset));
14503 src_autoinc += UNITS_PER_WORD * words;
14505 else
14507 for (j = 0; j < words; j++)
14509 addr = plus_constant (Pmode, src,
14510 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14511 mem = adjust_automodify_address (srcbase, SImode, addr,
14512 srcoffset + j * UNITS_PER_WORD);
14513 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14515 srcoffset += words * UNITS_PER_WORD;
14518 if (dst_aligned && words > 1)
14520 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14521 &dstoffset));
14522 dst_autoinc += words * UNITS_PER_WORD;
14524 else
14526 for (j = 0; j < words; j++)
14528 addr = plus_constant (Pmode, dst,
14529 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14530 mem = adjust_automodify_address (dstbase, SImode, addr,
14531 dstoffset + j * UNITS_PER_WORD);
14532 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14534 dstoffset += words * UNITS_PER_WORD;
14537 remaining -= words * UNITS_PER_WORD;
14539 gcc_assert (remaining < 4);
14541 /* Copy a halfword if necessary. */
14543 if (remaining >= 2)
14545 halfword_tmp = gen_reg_rtx (SImode);
14547 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14548 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14549 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14551 /* Either write out immediately, or delay until we've loaded the last
14552 byte, depending on interleave factor. */
14553 if (interleave_factor == 1)
14555 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14556 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14557 emit_insn (gen_unaligned_storehi (mem,
14558 gen_lowpart (HImode, halfword_tmp)));
14559 halfword_tmp = NULL;
14560 dstoffset += 2;
14563 remaining -= 2;
14564 srcoffset += 2;
14567 gcc_assert (remaining < 2);
14569 /* Copy last byte. */
14571 if ((remaining & 1) != 0)
14573 byte_tmp = gen_reg_rtx (SImode);
14575 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14576 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14577 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14579 if (interleave_factor == 1)
14581 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14582 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14583 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14584 byte_tmp = NULL;
14585 dstoffset++;
14588 remaining--;
14589 srcoffset++;
14592 /* Store last halfword if we haven't done so already. */
14594 if (halfword_tmp)
14596 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14597 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14598 emit_insn (gen_unaligned_storehi (mem,
14599 gen_lowpart (HImode, halfword_tmp)));
14600 dstoffset += 2;
14603 /* Likewise for last byte. */
14605 if (byte_tmp)
14607 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14608 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14609 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14610 dstoffset++;
14613 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14616 /* From mips_adjust_block_mem:
14618 Helper function for doing a loop-based block operation on memory
14619 reference MEM. Each iteration of the loop will operate on LENGTH
14620 bytes of MEM.
14622 Create a new base register for use within the loop and point it to
14623 the start of MEM. Create a new memory reference that uses this
14624 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14626 static void
14627 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14628 rtx *loop_mem)
14630 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14632 /* Although the new mem does not refer to a known location,
14633 it does keep up to LENGTH bytes of alignment. */
14634 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14635 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14638 /* From mips_block_move_loop:
14640 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14641 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14642 the memory regions do not overlap. */
14644 static void
14645 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14646 unsigned int interleave_factor,
14647 HOST_WIDE_INT bytes_per_iter)
14649 rtx src_reg, dest_reg, final_src, test;
14650 HOST_WIDE_INT leftover;
14652 leftover = length % bytes_per_iter;
14653 length -= leftover;
14655 /* Create registers and memory references for use within the loop. */
14656 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14657 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14659 /* Calculate the value that SRC_REG should have after the last iteration of
14660 the loop. */
14661 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14662 0, 0, OPTAB_WIDEN);
14664 /* Emit the start of the loop. */
14665 rtx_code_label *label = gen_label_rtx ();
14666 emit_label (label);
14668 /* Emit the loop body. */
14669 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14670 interleave_factor);
14672 /* Move on to the next block. */
14673 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14674 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14676 /* Emit the loop condition. */
14677 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14678 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14680 /* Mop up any left-over bytes. */
14681 if (leftover)
14682 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14685 /* Emit a block move when either the source or destination is unaligned (not
14686 aligned to a four-byte boundary). This may need further tuning depending on
14687 core type, optimize_size setting, etc. */
14689 static int
14690 arm_movmemqi_unaligned (rtx *operands)
14692 HOST_WIDE_INT length = INTVAL (operands[2]);
14694 if (optimize_size)
14696 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14697 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14698 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14699 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14700 or dst_aligned though: allow more interleaving in those cases since the
14701 resulting code can be smaller. */
14702 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14703 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14705 if (length > 12)
14706 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14707 interleave_factor, bytes_per_iter);
14708 else
14709 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14710 interleave_factor);
14712 else
14714 /* Note that the loop created by arm_block_move_unaligned_loop may be
14715 subject to loop unrolling, which makes tuning this condition a little
14716 redundant. */
14717 if (length > 32)
14718 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14719 else
14720 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14723 return 1;
14727 arm_gen_movmemqi (rtx *operands)
14729 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14730 HOST_WIDE_INT srcoffset, dstoffset;
14731 int i;
14732 rtx src, dst, srcbase, dstbase;
14733 rtx part_bytes_reg = NULL;
14734 rtx mem;
14736 if (!CONST_INT_P (operands[2])
14737 || !CONST_INT_P (operands[3])
14738 || INTVAL (operands[2]) > 64)
14739 return 0;
14741 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14742 return arm_movmemqi_unaligned (operands);
14744 if (INTVAL (operands[3]) & 3)
14745 return 0;
14747 dstbase = operands[0];
14748 srcbase = operands[1];
14750 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14751 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14753 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14754 out_words_to_go = INTVAL (operands[2]) / 4;
14755 last_bytes = INTVAL (operands[2]) & 3;
14756 dstoffset = srcoffset = 0;
14758 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14759 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14761 for (i = 0; in_words_to_go >= 2; i+=4)
14763 if (in_words_to_go > 4)
14764 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14765 TRUE, srcbase, &srcoffset));
14766 else
14767 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14768 src, FALSE, srcbase,
14769 &srcoffset));
14771 if (out_words_to_go)
14773 if (out_words_to_go > 4)
14774 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14775 TRUE, dstbase, &dstoffset));
14776 else if (out_words_to_go != 1)
14777 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14778 out_words_to_go, dst,
14779 (last_bytes == 0
14780 ? FALSE : TRUE),
14781 dstbase, &dstoffset));
14782 else
14784 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14785 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14786 if (last_bytes != 0)
14788 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14789 dstoffset += 4;
14794 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14795 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14798 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14799 if (out_words_to_go)
14801 rtx sreg;
14803 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14804 sreg = copy_to_reg (mem);
14806 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14807 emit_move_insn (mem, sreg);
14808 in_words_to_go--;
14810 gcc_assert (!in_words_to_go); /* Sanity check */
14813 if (in_words_to_go)
14815 gcc_assert (in_words_to_go > 0);
14817 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14818 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14821 gcc_assert (!last_bytes || part_bytes_reg);
14823 if (BYTES_BIG_ENDIAN && last_bytes)
14825 rtx tmp = gen_reg_rtx (SImode);
14827 /* The bytes we want are in the top end of the word. */
14828 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14829 GEN_INT (8 * (4 - last_bytes))));
14830 part_bytes_reg = tmp;
14832 while (last_bytes)
14834 mem = adjust_automodify_address (dstbase, QImode,
14835 plus_constant (Pmode, dst,
14836 last_bytes - 1),
14837 dstoffset + last_bytes - 1);
14838 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14840 if (--last_bytes)
14842 tmp = gen_reg_rtx (SImode);
14843 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14844 part_bytes_reg = tmp;
14849 else
14851 if (last_bytes > 1)
14853 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14854 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14855 last_bytes -= 2;
14856 if (last_bytes)
14858 rtx tmp = gen_reg_rtx (SImode);
14859 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14860 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14861 part_bytes_reg = tmp;
14862 dstoffset += 2;
14866 if (last_bytes)
14868 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14869 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14873 return 1;
14876 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14877 by mode size. */
14878 inline static rtx
14879 next_consecutive_mem (rtx mem)
14881 machine_mode mode = GET_MODE (mem);
14882 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14883 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14885 return adjust_automodify_address (mem, mode, addr, offset);
14888 /* Copy using LDRD/STRD instructions whenever possible.
14889 Returns true upon success. */
14890 bool
14891 gen_movmem_ldrd_strd (rtx *operands)
14893 unsigned HOST_WIDE_INT len;
14894 HOST_WIDE_INT align;
14895 rtx src, dst, base;
14896 rtx reg0;
14897 bool src_aligned, dst_aligned;
14898 bool src_volatile, dst_volatile;
14900 gcc_assert (CONST_INT_P (operands[2]));
14901 gcc_assert (CONST_INT_P (operands[3]));
14903 len = UINTVAL (operands[2]);
14904 if (len > 64)
14905 return false;
14907 /* Maximum alignment we can assume for both src and dst buffers. */
14908 align = INTVAL (operands[3]);
14910 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14911 return false;
14913 /* Place src and dst addresses in registers
14914 and update the corresponding mem rtx. */
14915 dst = operands[0];
14916 dst_volatile = MEM_VOLATILE_P (dst);
14917 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14918 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14919 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14921 src = operands[1];
14922 src_volatile = MEM_VOLATILE_P (src);
14923 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14924 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14925 src = adjust_automodify_address (src, VOIDmode, base, 0);
14927 if (!unaligned_access && !(src_aligned && dst_aligned))
14928 return false;
14930 if (src_volatile || dst_volatile)
14931 return false;
14933 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14934 if (!(dst_aligned || src_aligned))
14935 return arm_gen_movmemqi (operands);
14937 src = adjust_address (src, DImode, 0);
14938 dst = adjust_address (dst, DImode, 0);
14939 while (len >= 8)
14941 len -= 8;
14942 reg0 = gen_reg_rtx (DImode);
14943 if (src_aligned)
14944 emit_move_insn (reg0, src);
14945 else
14946 emit_insn (gen_unaligned_loaddi (reg0, src));
14948 if (dst_aligned)
14949 emit_move_insn (dst, reg0);
14950 else
14951 emit_insn (gen_unaligned_storedi (dst, reg0));
14953 src = next_consecutive_mem (src);
14954 dst = next_consecutive_mem (dst);
14957 gcc_assert (len < 8);
14958 if (len >= 4)
14960 /* More than a word but less than a double-word to copy. Copy a word. */
14961 reg0 = gen_reg_rtx (SImode);
14962 src = adjust_address (src, SImode, 0);
14963 dst = adjust_address (dst, SImode, 0);
14964 if (src_aligned)
14965 emit_move_insn (reg0, src);
14966 else
14967 emit_insn (gen_unaligned_loadsi (reg0, src));
14969 if (dst_aligned)
14970 emit_move_insn (dst, reg0);
14971 else
14972 emit_insn (gen_unaligned_storesi (dst, reg0));
14974 src = next_consecutive_mem (src);
14975 dst = next_consecutive_mem (dst);
14976 len -= 4;
14979 if (len == 0)
14980 return true;
14982 /* Copy the remaining bytes. */
14983 if (len >= 2)
14985 dst = adjust_address (dst, HImode, 0);
14986 src = adjust_address (src, HImode, 0);
14987 reg0 = gen_reg_rtx (SImode);
14988 if (src_aligned)
14989 emit_insn (gen_zero_extendhisi2 (reg0, src));
14990 else
14991 emit_insn (gen_unaligned_loadhiu (reg0, src));
14993 if (dst_aligned)
14994 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14995 else
14996 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14998 src = next_consecutive_mem (src);
14999 dst = next_consecutive_mem (dst);
15000 if (len == 2)
15001 return true;
15004 dst = adjust_address (dst, QImode, 0);
15005 src = adjust_address (src, QImode, 0);
15006 reg0 = gen_reg_rtx (QImode);
15007 emit_move_insn (reg0, src);
15008 emit_move_insn (dst, reg0);
15009 return true;
15012 /* Select a dominance comparison mode if possible for a test of the general
15013 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
15014 COND_OR == DOM_CC_X_AND_Y => (X && Y)
15015 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15016 COND_OR == DOM_CC_X_OR_Y => (X || Y)
15017 In all cases OP will be either EQ or NE, but we don't need to know which
15018 here. If we are unable to support a dominance comparison we return
15019 CC mode. This will then fail to match for the RTL expressions that
15020 generate this call. */
15021 machine_mode
15022 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
15024 enum rtx_code cond1, cond2;
15025 int swapped = 0;
15027 /* Currently we will probably get the wrong result if the individual
15028 comparisons are not simple. This also ensures that it is safe to
15029 reverse a comparison if necessary. */
15030 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
15031 != CCmode)
15032 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
15033 != CCmode))
15034 return CCmode;
15036 /* The if_then_else variant of this tests the second condition if the
15037 first passes, but is true if the first fails. Reverse the first
15038 condition to get a true "inclusive-or" expression. */
15039 if (cond_or == DOM_CC_NX_OR_Y)
15040 cond1 = reverse_condition (cond1);
15042 /* If the comparisons are not equal, and one doesn't dominate the other,
15043 then we can't do this. */
15044 if (cond1 != cond2
15045 && !comparison_dominates_p (cond1, cond2)
15046 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
15047 return CCmode;
15049 if (swapped)
15050 std::swap (cond1, cond2);
15052 switch (cond1)
15054 case EQ:
15055 if (cond_or == DOM_CC_X_AND_Y)
15056 return CC_DEQmode;
15058 switch (cond2)
15060 case EQ: return CC_DEQmode;
15061 case LE: return CC_DLEmode;
15062 case LEU: return CC_DLEUmode;
15063 case GE: return CC_DGEmode;
15064 case GEU: return CC_DGEUmode;
15065 default: gcc_unreachable ();
15068 case LT:
15069 if (cond_or == DOM_CC_X_AND_Y)
15070 return CC_DLTmode;
15072 switch (cond2)
15074 case LT:
15075 return CC_DLTmode;
15076 case LE:
15077 return CC_DLEmode;
15078 case NE:
15079 return CC_DNEmode;
15080 default:
15081 gcc_unreachable ();
15084 case GT:
15085 if (cond_or == DOM_CC_X_AND_Y)
15086 return CC_DGTmode;
15088 switch (cond2)
15090 case GT:
15091 return CC_DGTmode;
15092 case GE:
15093 return CC_DGEmode;
15094 case NE:
15095 return CC_DNEmode;
15096 default:
15097 gcc_unreachable ();
15100 case LTU:
15101 if (cond_or == DOM_CC_X_AND_Y)
15102 return CC_DLTUmode;
15104 switch (cond2)
15106 case LTU:
15107 return CC_DLTUmode;
15108 case LEU:
15109 return CC_DLEUmode;
15110 case NE:
15111 return CC_DNEmode;
15112 default:
15113 gcc_unreachable ();
15116 case GTU:
15117 if (cond_or == DOM_CC_X_AND_Y)
15118 return CC_DGTUmode;
15120 switch (cond2)
15122 case GTU:
15123 return CC_DGTUmode;
15124 case GEU:
15125 return CC_DGEUmode;
15126 case NE:
15127 return CC_DNEmode;
15128 default:
15129 gcc_unreachable ();
15132 /* The remaining cases only occur when both comparisons are the
15133 same. */
15134 case NE:
15135 gcc_assert (cond1 == cond2);
15136 return CC_DNEmode;
15138 case LE:
15139 gcc_assert (cond1 == cond2);
15140 return CC_DLEmode;
15142 case GE:
15143 gcc_assert (cond1 == cond2);
15144 return CC_DGEmode;
15146 case LEU:
15147 gcc_assert (cond1 == cond2);
15148 return CC_DLEUmode;
15150 case GEU:
15151 gcc_assert (cond1 == cond2);
15152 return CC_DGEUmode;
15154 default:
15155 gcc_unreachable ();
15159 machine_mode
15160 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15162 /* All floating point compares return CCFP if it is an equality
15163 comparison, and CCFPE otherwise. */
15164 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15166 switch (op)
15168 case EQ:
15169 case NE:
15170 case UNORDERED:
15171 case ORDERED:
15172 case UNLT:
15173 case UNLE:
15174 case UNGT:
15175 case UNGE:
15176 case UNEQ:
15177 case LTGT:
15178 return CCFPmode;
15180 case LT:
15181 case LE:
15182 case GT:
15183 case GE:
15184 return CCFPEmode;
15186 default:
15187 gcc_unreachable ();
15191 /* A compare with a shifted operand. Because of canonicalization, the
15192 comparison will have to be swapped when we emit the assembler. */
15193 if (GET_MODE (y) == SImode
15194 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15195 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15196 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15197 || GET_CODE (x) == ROTATERT))
15198 return CC_SWPmode;
15200 /* This operation is performed swapped, but since we only rely on the Z
15201 flag we don't need an additional mode. */
15202 if (GET_MODE (y) == SImode
15203 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15204 && GET_CODE (x) == NEG
15205 && (op == EQ || op == NE))
15206 return CC_Zmode;
15208 /* This is a special case that is used by combine to allow a
15209 comparison of a shifted byte load to be split into a zero-extend
15210 followed by a comparison of the shifted integer (only valid for
15211 equalities and unsigned inequalities). */
15212 if (GET_MODE (x) == SImode
15213 && GET_CODE (x) == ASHIFT
15214 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15215 && GET_CODE (XEXP (x, 0)) == SUBREG
15216 && MEM_P (SUBREG_REG (XEXP (x, 0)))
15217 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15218 && (op == EQ || op == NE
15219 || op == GEU || op == GTU || op == LTU || op == LEU)
15220 && CONST_INT_P (y))
15221 return CC_Zmode;
15223 /* A construct for a conditional compare, if the false arm contains
15224 0, then both conditions must be true, otherwise either condition
15225 must be true. Not all conditions are possible, so CCmode is
15226 returned if it can't be done. */
15227 if (GET_CODE (x) == IF_THEN_ELSE
15228 && (XEXP (x, 2) == const0_rtx
15229 || XEXP (x, 2) == const1_rtx)
15230 && COMPARISON_P (XEXP (x, 0))
15231 && COMPARISON_P (XEXP (x, 1)))
15232 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15233 INTVAL (XEXP (x, 2)));
15235 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15236 if (GET_CODE (x) == AND
15237 && (op == EQ || op == NE)
15238 && COMPARISON_P (XEXP (x, 0))
15239 && COMPARISON_P (XEXP (x, 1)))
15240 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15241 DOM_CC_X_AND_Y);
15243 if (GET_CODE (x) == IOR
15244 && (op == EQ || op == NE)
15245 && COMPARISON_P (XEXP (x, 0))
15246 && COMPARISON_P (XEXP (x, 1)))
15247 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15248 DOM_CC_X_OR_Y);
15250 /* An operation (on Thumb) where we want to test for a single bit.
15251 This is done by shifting that bit up into the top bit of a
15252 scratch register; we can then branch on the sign bit. */
15253 if (TARGET_THUMB1
15254 && GET_MODE (x) == SImode
15255 && (op == EQ || op == NE)
15256 && GET_CODE (x) == ZERO_EXTRACT
15257 && XEXP (x, 1) == const1_rtx)
15258 return CC_Nmode;
15260 /* An operation that sets the condition codes as a side-effect, the
15261 V flag is not set correctly, so we can only use comparisons where
15262 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15263 instead.) */
15264 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15265 if (GET_MODE (x) == SImode
15266 && y == const0_rtx
15267 && (op == EQ || op == NE || op == LT || op == GE)
15268 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15269 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15270 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15271 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15272 || GET_CODE (x) == LSHIFTRT
15273 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15274 || GET_CODE (x) == ROTATERT
15275 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15276 return CC_NOOVmode;
15278 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15279 return CC_Zmode;
15281 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15282 && GET_CODE (x) == PLUS
15283 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15284 return CC_Cmode;
15286 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15288 switch (op)
15290 case EQ:
15291 case NE:
15292 /* A DImode comparison against zero can be implemented by
15293 or'ing the two halves together. */
15294 if (y == const0_rtx)
15295 return CC_Zmode;
15297 /* We can do an equality test in three Thumb instructions. */
15298 if (!TARGET_32BIT)
15299 return CC_Zmode;
15301 /* FALLTHROUGH */
15303 case LTU:
15304 case LEU:
15305 case GTU:
15306 case GEU:
15307 /* DImode unsigned comparisons can be implemented by cmp +
15308 cmpeq without a scratch register. Not worth doing in
15309 Thumb-2. */
15310 if (TARGET_32BIT)
15311 return CC_CZmode;
15313 /* FALLTHROUGH */
15315 case LT:
15316 case LE:
15317 case GT:
15318 case GE:
15319 /* DImode signed and unsigned comparisons can be implemented
15320 by cmp + sbcs with a scratch register, but that does not
15321 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15322 gcc_assert (op != EQ && op != NE);
15323 return CC_NCVmode;
15325 default:
15326 gcc_unreachable ();
15330 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15331 return GET_MODE (x);
15333 return CCmode;
15336 /* X and Y are two things to compare using CODE. Emit the compare insn and
15337 return the rtx for register 0 in the proper mode. FP means this is a
15338 floating point compare: I don't think that it is needed on the arm. */
15340 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15342 machine_mode mode;
15343 rtx cc_reg;
15344 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15346 /* We might have X as a constant, Y as a register because of the predicates
15347 used for cmpdi. If so, force X to a register here. */
15348 if (dimode_comparison && !REG_P (x))
15349 x = force_reg (DImode, x);
15351 mode = SELECT_CC_MODE (code, x, y);
15352 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15354 if (dimode_comparison
15355 && mode != CC_CZmode)
15357 rtx clobber, set;
15359 /* To compare two non-zero values for equality, XOR them and
15360 then compare against zero. Not used for ARM mode; there
15361 CC_CZmode is cheaper. */
15362 if (mode == CC_Zmode && y != const0_rtx)
15364 gcc_assert (!reload_completed);
15365 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15366 y = const0_rtx;
15369 /* A scratch register is required. */
15370 if (reload_completed)
15371 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15372 else
15373 scratch = gen_rtx_SCRATCH (SImode);
15375 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15376 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
15377 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15379 else
15380 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15382 return cc_reg;
15385 /* Generate a sequence of insns that will generate the correct return
15386 address mask depending on the physical architecture that the program
15387 is running on. */
15389 arm_gen_return_addr_mask (void)
15391 rtx reg = gen_reg_rtx (Pmode);
15393 emit_insn (gen_return_addr_mask (reg));
15394 return reg;
15397 void
15398 arm_reload_in_hi (rtx *operands)
15400 rtx ref = operands[1];
15401 rtx base, scratch;
15402 HOST_WIDE_INT offset = 0;
15404 if (GET_CODE (ref) == SUBREG)
15406 offset = SUBREG_BYTE (ref);
15407 ref = SUBREG_REG (ref);
15410 if (REG_P (ref))
15412 /* We have a pseudo which has been spilt onto the stack; there
15413 are two cases here: the first where there is a simple
15414 stack-slot replacement and a second where the stack-slot is
15415 out of range, or is used as a subreg. */
15416 if (reg_equiv_mem (REGNO (ref)))
15418 ref = reg_equiv_mem (REGNO (ref));
15419 base = find_replacement (&XEXP (ref, 0));
15421 else
15422 /* The slot is out of range, or was dressed up in a SUBREG. */
15423 base = reg_equiv_address (REGNO (ref));
15425 else
15426 base = find_replacement (&XEXP (ref, 0));
15428 /* Handle the case where the address is too complex to be offset by 1. */
15429 if (GET_CODE (base) == MINUS
15430 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15432 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15434 emit_set_insn (base_plus, base);
15435 base = base_plus;
15437 else if (GET_CODE (base) == PLUS)
15439 /* The addend must be CONST_INT, or we would have dealt with it above. */
15440 HOST_WIDE_INT hi, lo;
15442 offset += INTVAL (XEXP (base, 1));
15443 base = XEXP (base, 0);
15445 /* Rework the address into a legal sequence of insns. */
15446 /* Valid range for lo is -4095 -> 4095 */
15447 lo = (offset >= 0
15448 ? (offset & 0xfff)
15449 : -((-offset) & 0xfff));
15451 /* Corner case, if lo is the max offset then we would be out of range
15452 once we have added the additional 1 below, so bump the msb into the
15453 pre-loading insn(s). */
15454 if (lo == 4095)
15455 lo &= 0x7ff;
15457 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15458 ^ (HOST_WIDE_INT) 0x80000000)
15459 - (HOST_WIDE_INT) 0x80000000);
15461 gcc_assert (hi + lo == offset);
15463 if (hi != 0)
15465 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15467 /* Get the base address; addsi3 knows how to handle constants
15468 that require more than one insn. */
15469 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15470 base = base_plus;
15471 offset = lo;
15475 /* Operands[2] may overlap operands[0] (though it won't overlap
15476 operands[1]), that's why we asked for a DImode reg -- so we can
15477 use the bit that does not overlap. */
15478 if (REGNO (operands[2]) == REGNO (operands[0]))
15479 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15480 else
15481 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15483 emit_insn (gen_zero_extendqisi2 (scratch,
15484 gen_rtx_MEM (QImode,
15485 plus_constant (Pmode, base,
15486 offset))));
15487 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15488 gen_rtx_MEM (QImode,
15489 plus_constant (Pmode, base,
15490 offset + 1))));
15491 if (!BYTES_BIG_ENDIAN)
15492 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15493 gen_rtx_IOR (SImode,
15494 gen_rtx_ASHIFT
15495 (SImode,
15496 gen_rtx_SUBREG (SImode, operands[0], 0),
15497 GEN_INT (8)),
15498 scratch));
15499 else
15500 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15501 gen_rtx_IOR (SImode,
15502 gen_rtx_ASHIFT (SImode, scratch,
15503 GEN_INT (8)),
15504 gen_rtx_SUBREG (SImode, operands[0], 0)));
15507 /* Handle storing a half-word to memory during reload by synthesizing as two
15508 byte stores. Take care not to clobber the input values until after we
15509 have moved them somewhere safe. This code assumes that if the DImode
15510 scratch in operands[2] overlaps either the input value or output address
15511 in some way, then that value must die in this insn (we absolutely need
15512 two scratch registers for some corner cases). */
15513 void
15514 arm_reload_out_hi (rtx *operands)
15516 rtx ref = operands[0];
15517 rtx outval = operands[1];
15518 rtx base, scratch;
15519 HOST_WIDE_INT offset = 0;
15521 if (GET_CODE (ref) == SUBREG)
15523 offset = SUBREG_BYTE (ref);
15524 ref = SUBREG_REG (ref);
15527 if (REG_P (ref))
15529 /* We have a pseudo which has been spilt onto the stack; there
15530 are two cases here: the first where there is a simple
15531 stack-slot replacement and a second where the stack-slot is
15532 out of range, or is used as a subreg. */
15533 if (reg_equiv_mem (REGNO (ref)))
15535 ref = reg_equiv_mem (REGNO (ref));
15536 base = find_replacement (&XEXP (ref, 0));
15538 else
15539 /* The slot is out of range, or was dressed up in a SUBREG. */
15540 base = reg_equiv_address (REGNO (ref));
15542 else
15543 base = find_replacement (&XEXP (ref, 0));
15545 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15547 /* Handle the case where the address is too complex to be offset by 1. */
15548 if (GET_CODE (base) == MINUS
15549 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15551 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15553 /* Be careful not to destroy OUTVAL. */
15554 if (reg_overlap_mentioned_p (base_plus, outval))
15556 /* Updating base_plus might destroy outval, see if we can
15557 swap the scratch and base_plus. */
15558 if (!reg_overlap_mentioned_p (scratch, outval))
15559 std::swap (scratch, base_plus);
15560 else
15562 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15564 /* Be conservative and copy OUTVAL into the scratch now,
15565 this should only be necessary if outval is a subreg
15566 of something larger than a word. */
15567 /* XXX Might this clobber base? I can't see how it can,
15568 since scratch is known to overlap with OUTVAL, and
15569 must be wider than a word. */
15570 emit_insn (gen_movhi (scratch_hi, outval));
15571 outval = scratch_hi;
15575 emit_set_insn (base_plus, base);
15576 base = base_plus;
15578 else if (GET_CODE (base) == PLUS)
15580 /* The addend must be CONST_INT, or we would have dealt with it above. */
15581 HOST_WIDE_INT hi, lo;
15583 offset += INTVAL (XEXP (base, 1));
15584 base = XEXP (base, 0);
15586 /* Rework the address into a legal sequence of insns. */
15587 /* Valid range for lo is -4095 -> 4095 */
15588 lo = (offset >= 0
15589 ? (offset & 0xfff)
15590 : -((-offset) & 0xfff));
15592 /* Corner case, if lo is the max offset then we would be out of range
15593 once we have added the additional 1 below, so bump the msb into the
15594 pre-loading insn(s). */
15595 if (lo == 4095)
15596 lo &= 0x7ff;
15598 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15599 ^ (HOST_WIDE_INT) 0x80000000)
15600 - (HOST_WIDE_INT) 0x80000000);
15602 gcc_assert (hi + lo == offset);
15604 if (hi != 0)
15606 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15608 /* Be careful not to destroy OUTVAL. */
15609 if (reg_overlap_mentioned_p (base_plus, outval))
15611 /* Updating base_plus might destroy outval, see if we
15612 can swap the scratch and base_plus. */
15613 if (!reg_overlap_mentioned_p (scratch, outval))
15614 std::swap (scratch, base_plus);
15615 else
15617 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15619 /* Be conservative and copy outval into scratch now,
15620 this should only be necessary if outval is a
15621 subreg of something larger than a word. */
15622 /* XXX Might this clobber base? I can't see how it
15623 can, since scratch is known to overlap with
15624 outval. */
15625 emit_insn (gen_movhi (scratch_hi, outval));
15626 outval = scratch_hi;
15630 /* Get the base address; addsi3 knows how to handle constants
15631 that require more than one insn. */
15632 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15633 base = base_plus;
15634 offset = lo;
15638 if (BYTES_BIG_ENDIAN)
15640 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15641 plus_constant (Pmode, base,
15642 offset + 1)),
15643 gen_lowpart (QImode, outval)));
15644 emit_insn (gen_lshrsi3 (scratch,
15645 gen_rtx_SUBREG (SImode, outval, 0),
15646 GEN_INT (8)));
15647 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15648 offset)),
15649 gen_lowpart (QImode, scratch)));
15651 else
15653 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15654 offset)),
15655 gen_lowpart (QImode, outval)));
15656 emit_insn (gen_lshrsi3 (scratch,
15657 gen_rtx_SUBREG (SImode, outval, 0),
15658 GEN_INT (8)));
15659 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15660 plus_constant (Pmode, base,
15661 offset + 1)),
15662 gen_lowpart (QImode, scratch)));
15666 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15667 (padded to the size of a word) should be passed in a register. */
15669 static bool
15670 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15672 if (TARGET_AAPCS_BASED)
15673 return must_pass_in_stack_var_size (mode, type);
15674 else
15675 return must_pass_in_stack_var_size_or_pad (mode, type);
15679 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15680 Return true if an argument passed on the stack should be padded upwards,
15681 i.e. if the least-significant byte has useful data.
15682 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15683 aggregate types are placed in the lowest memory address. */
15685 bool
15686 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15688 if (!TARGET_AAPCS_BASED)
15689 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15691 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15692 return false;
15694 return true;
15698 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15699 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15700 register has useful data, and return the opposite if the most
15701 significant byte does. */
15703 bool
15704 arm_pad_reg_upward (machine_mode mode,
15705 tree type, int first ATTRIBUTE_UNUSED)
15707 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15709 /* For AAPCS, small aggregates, small fixed-point types,
15710 and small complex types are always padded upwards. */
15711 if (type)
15713 if ((AGGREGATE_TYPE_P (type)
15714 || TREE_CODE (type) == COMPLEX_TYPE
15715 || FIXED_POINT_TYPE_P (type))
15716 && int_size_in_bytes (type) <= 4)
15717 return true;
15719 else
15721 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15722 && GET_MODE_SIZE (mode) <= 4)
15723 return true;
15727 /* Otherwise, use default padding. */
15728 return !BYTES_BIG_ENDIAN;
15731 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15732 assuming that the address in the base register is word aligned. */
15733 bool
15734 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15736 HOST_WIDE_INT max_offset;
15738 /* Offset must be a multiple of 4 in Thumb mode. */
15739 if (TARGET_THUMB2 && ((offset & 3) != 0))
15740 return false;
15742 if (TARGET_THUMB2)
15743 max_offset = 1020;
15744 else if (TARGET_ARM)
15745 max_offset = 255;
15746 else
15747 return false;
15749 return ((offset <= max_offset) && (offset >= -max_offset));
15752 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15753 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15754 Assumes that the address in the base register RN is word aligned. Pattern
15755 guarantees that both memory accesses use the same base register,
15756 the offsets are constants within the range, and the gap between the offsets is 4.
15757 If preload complete then check that registers are legal. WBACK indicates whether
15758 address is updated. LOAD indicates whether memory access is load or store. */
15759 bool
15760 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15761 bool wback, bool load)
15763 unsigned int t, t2, n;
15765 if (!reload_completed)
15766 return true;
15768 if (!offset_ok_for_ldrd_strd (offset))
15769 return false;
15771 t = REGNO (rt);
15772 t2 = REGNO (rt2);
15773 n = REGNO (rn);
15775 if ((TARGET_THUMB2)
15776 && ((wback && (n == t || n == t2))
15777 || (t == SP_REGNUM)
15778 || (t == PC_REGNUM)
15779 || (t2 == SP_REGNUM)
15780 || (t2 == PC_REGNUM)
15781 || (!load && (n == PC_REGNUM))
15782 || (load && (t == t2))
15783 /* Triggers Cortex-M3 LDRD errata. */
15784 || (!wback && load && fix_cm3_ldrd && (n == t))))
15785 return false;
15787 if ((TARGET_ARM)
15788 && ((wback && (n == t || n == t2))
15789 || (t2 == PC_REGNUM)
15790 || (t % 2 != 0) /* First destination register is not even. */
15791 || (t2 != t + 1)
15792 /* PC can be used as base register (for offset addressing only),
15793 but it is depricated. */
15794 || (n == PC_REGNUM)))
15795 return false;
15797 return true;
15800 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15801 operand MEM's address contains an immediate offset from the base
15802 register and has no side effects, in which case it sets BASE and
15803 OFFSET accordingly. */
15804 static bool
15805 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15807 rtx addr;
15809 gcc_assert (base != NULL && offset != NULL);
15811 /* TODO: Handle more general memory operand patterns, such as
15812 PRE_DEC and PRE_INC. */
15814 if (side_effects_p (mem))
15815 return false;
15817 /* Can't deal with subregs. */
15818 if (GET_CODE (mem) == SUBREG)
15819 return false;
15821 gcc_assert (MEM_P (mem));
15823 *offset = const0_rtx;
15825 addr = XEXP (mem, 0);
15827 /* If addr isn't valid for DImode, then we can't handle it. */
15828 if (!arm_legitimate_address_p (DImode, addr,
15829 reload_in_progress || reload_completed))
15830 return false;
15832 if (REG_P (addr))
15834 *base = addr;
15835 return true;
15837 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15839 *base = XEXP (addr, 0);
15840 *offset = XEXP (addr, 1);
15841 return (REG_P (*base) && CONST_INT_P (*offset));
15844 return false;
15847 /* Called from a peephole2 to replace two word-size accesses with a
15848 single LDRD/STRD instruction. Returns true iff we can generate a
15849 new instruction sequence. That is, both accesses use the same base
15850 register and the gap between constant offsets is 4. This function
15851 may reorder its operands to match ldrd/strd RTL templates.
15852 OPERANDS are the operands found by the peephole matcher;
15853 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15854 corresponding memory operands. LOAD indicaates whether the access
15855 is load or store. CONST_STORE indicates a store of constant
15856 integer values held in OPERANDS[4,5] and assumes that the pattern
15857 is of length 4 insn, for the purpose of checking dead registers.
15858 COMMUTE indicates that register operands may be reordered. */
15859 bool
15860 gen_operands_ldrd_strd (rtx *operands, bool load,
15861 bool const_store, bool commute)
15863 int nops = 2;
15864 HOST_WIDE_INT offsets[2], offset;
15865 rtx base = NULL_RTX;
15866 rtx cur_base, cur_offset, tmp;
15867 int i, gap;
15868 HARD_REG_SET regset;
15870 gcc_assert (!const_store || !load);
15871 /* Check that the memory references are immediate offsets from the
15872 same base register. Extract the base register, the destination
15873 registers, and the corresponding memory offsets. */
15874 for (i = 0; i < nops; i++)
15876 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15877 return false;
15879 if (i == 0)
15880 base = cur_base;
15881 else if (REGNO (base) != REGNO (cur_base))
15882 return false;
15884 offsets[i] = INTVAL (cur_offset);
15885 if (GET_CODE (operands[i]) == SUBREG)
15887 tmp = SUBREG_REG (operands[i]);
15888 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15889 operands[i] = tmp;
15893 /* Make sure there is no dependency between the individual loads. */
15894 if (load && REGNO (operands[0]) == REGNO (base))
15895 return false; /* RAW */
15897 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15898 return false; /* WAW */
15900 /* If the same input register is used in both stores
15901 when storing different constants, try to find a free register.
15902 For example, the code
15903 mov r0, 0
15904 str r0, [r2]
15905 mov r0, 1
15906 str r0, [r2, #4]
15907 can be transformed into
15908 mov r1, 0
15909 strd r1, r0, [r2]
15910 in Thumb mode assuming that r1 is free. */
15911 if (const_store
15912 && REGNO (operands[0]) == REGNO (operands[1])
15913 && INTVAL (operands[4]) != INTVAL (operands[5]))
15915 if (TARGET_THUMB2)
15917 CLEAR_HARD_REG_SET (regset);
15918 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15919 if (tmp == NULL_RTX)
15920 return false;
15922 /* Use the new register in the first load to ensure that
15923 if the original input register is not dead after peephole,
15924 then it will have the correct constant value. */
15925 operands[0] = tmp;
15927 else if (TARGET_ARM)
15929 return false;
15930 int regno = REGNO (operands[0]);
15931 if (!peep2_reg_dead_p (4, operands[0]))
15933 /* When the input register is even and is not dead after the
15934 pattern, it has to hold the second constant but we cannot
15935 form a legal STRD in ARM mode with this register as the second
15936 register. */
15937 if (regno % 2 == 0)
15938 return false;
15940 /* Is regno-1 free? */
15941 SET_HARD_REG_SET (regset);
15942 CLEAR_HARD_REG_BIT(regset, regno - 1);
15943 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15944 if (tmp == NULL_RTX)
15945 return false;
15947 operands[0] = tmp;
15949 else
15951 /* Find a DImode register. */
15952 CLEAR_HARD_REG_SET (regset);
15953 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15954 if (tmp != NULL_RTX)
15956 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15957 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15959 else
15961 /* Can we use the input register to form a DI register? */
15962 SET_HARD_REG_SET (regset);
15963 CLEAR_HARD_REG_BIT(regset,
15964 regno % 2 == 0 ? regno + 1 : regno - 1);
15965 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15966 if (tmp == NULL_RTX)
15967 return false;
15968 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15972 gcc_assert (operands[0] != NULL_RTX);
15973 gcc_assert (operands[1] != NULL_RTX);
15974 gcc_assert (REGNO (operands[0]) % 2 == 0);
15975 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15979 /* Make sure the instructions are ordered with lower memory access first. */
15980 if (offsets[0] > offsets[1])
15982 gap = offsets[0] - offsets[1];
15983 offset = offsets[1];
15985 /* Swap the instructions such that lower memory is accessed first. */
15986 std::swap (operands[0], operands[1]);
15987 std::swap (operands[2], operands[3]);
15988 if (const_store)
15989 std::swap (operands[4], operands[5]);
15991 else
15993 gap = offsets[1] - offsets[0];
15994 offset = offsets[0];
15997 /* Make sure accesses are to consecutive memory locations. */
15998 if (gap != 4)
15999 return false;
16001 /* Make sure we generate legal instructions. */
16002 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16003 false, load))
16004 return true;
16006 /* In Thumb state, where registers are almost unconstrained, there
16007 is little hope to fix it. */
16008 if (TARGET_THUMB2)
16009 return false;
16011 if (load && commute)
16013 /* Try reordering registers. */
16014 std::swap (operands[0], operands[1]);
16015 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16016 false, load))
16017 return true;
16020 if (const_store)
16022 /* If input registers are dead after this pattern, they can be
16023 reordered or replaced by other registers that are free in the
16024 current pattern. */
16025 if (!peep2_reg_dead_p (4, operands[0])
16026 || !peep2_reg_dead_p (4, operands[1]))
16027 return false;
16029 /* Try to reorder the input registers. */
16030 /* For example, the code
16031 mov r0, 0
16032 mov r1, 1
16033 str r1, [r2]
16034 str r0, [r2, #4]
16035 can be transformed into
16036 mov r1, 0
16037 mov r0, 1
16038 strd r0, [r2]
16040 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
16041 false, false))
16043 std::swap (operands[0], operands[1]);
16044 return true;
16047 /* Try to find a free DI register. */
16048 CLEAR_HARD_REG_SET (regset);
16049 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
16050 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
16051 while (true)
16053 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
16054 if (tmp == NULL_RTX)
16055 return false;
16057 /* DREG must be an even-numbered register in DImode.
16058 Split it into SI registers. */
16059 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16060 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16061 gcc_assert (operands[0] != NULL_RTX);
16062 gcc_assert (operands[1] != NULL_RTX);
16063 gcc_assert (REGNO (operands[0]) % 2 == 0);
16064 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
16066 return (operands_ok_ldrd_strd (operands[0], operands[1],
16067 base, offset,
16068 false, load));
16072 return false;
16078 /* Print a symbolic form of X to the debug file, F. */
16079 static void
16080 arm_print_value (FILE *f, rtx x)
16082 switch (GET_CODE (x))
16084 case CONST_INT:
16085 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
16086 return;
16088 case CONST_DOUBLE:
16089 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
16090 return;
16092 case CONST_VECTOR:
16094 int i;
16096 fprintf (f, "<");
16097 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
16099 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
16100 if (i < (CONST_VECTOR_NUNITS (x) - 1))
16101 fputc (',', f);
16103 fprintf (f, ">");
16105 return;
16107 case CONST_STRING:
16108 fprintf (f, "\"%s\"", XSTR (x, 0));
16109 return;
16111 case SYMBOL_REF:
16112 fprintf (f, "`%s'", XSTR (x, 0));
16113 return;
16115 case LABEL_REF:
16116 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16117 return;
16119 case CONST:
16120 arm_print_value (f, XEXP (x, 0));
16121 return;
16123 case PLUS:
16124 arm_print_value (f, XEXP (x, 0));
16125 fprintf (f, "+");
16126 arm_print_value (f, XEXP (x, 1));
16127 return;
16129 case PC:
16130 fprintf (f, "pc");
16131 return;
16133 default:
16134 fprintf (f, "????");
16135 return;
16139 /* Routines for manipulation of the constant pool. */
16141 /* Arm instructions cannot load a large constant directly into a
16142 register; they have to come from a pc relative load. The constant
16143 must therefore be placed in the addressable range of the pc
16144 relative load. Depending on the precise pc relative load
16145 instruction the range is somewhere between 256 bytes and 4k. This
16146 means that we often have to dump a constant inside a function, and
16147 generate code to branch around it.
16149 It is important to minimize this, since the branches will slow
16150 things down and make the code larger.
16152 Normally we can hide the table after an existing unconditional
16153 branch so that there is no interruption of the flow, but in the
16154 worst case the code looks like this:
16156 ldr rn, L1
16158 b L2
16159 align
16160 L1: .long value
16164 ldr rn, L3
16166 b L4
16167 align
16168 L3: .long value
16172 We fix this by performing a scan after scheduling, which notices
16173 which instructions need to have their operands fetched from the
16174 constant table and builds the table.
16176 The algorithm starts by building a table of all the constants that
16177 need fixing up and all the natural barriers in the function (places
16178 where a constant table can be dropped without breaking the flow).
16179 For each fixup we note how far the pc-relative replacement will be
16180 able to reach and the offset of the instruction into the function.
16182 Having built the table we then group the fixes together to form
16183 tables that are as large as possible (subject to addressing
16184 constraints) and emit each table of constants after the last
16185 barrier that is within range of all the instructions in the group.
16186 If a group does not contain a barrier, then we forcibly create one
16187 by inserting a jump instruction into the flow. Once the table has
16188 been inserted, the insns are then modified to reference the
16189 relevant entry in the pool.
16191 Possible enhancements to the algorithm (not implemented) are:
16193 1) For some processors and object formats, there may be benefit in
16194 aligning the pools to the start of cache lines; this alignment
16195 would need to be taken into account when calculating addressability
16196 of a pool. */
16198 /* These typedefs are located at the start of this file, so that
16199 they can be used in the prototypes there. This comment is to
16200 remind readers of that fact so that the following structures
16201 can be understood more easily.
16203 typedef struct minipool_node Mnode;
16204 typedef struct minipool_fixup Mfix; */
16206 struct minipool_node
16208 /* Doubly linked chain of entries. */
16209 Mnode * next;
16210 Mnode * prev;
16211 /* The maximum offset into the code that this entry can be placed. While
16212 pushing fixes for forward references, all entries are sorted in order
16213 of increasing max_address. */
16214 HOST_WIDE_INT max_address;
16215 /* Similarly for an entry inserted for a backwards ref. */
16216 HOST_WIDE_INT min_address;
16217 /* The number of fixes referencing this entry. This can become zero
16218 if we "unpush" an entry. In this case we ignore the entry when we
16219 come to emit the code. */
16220 int refcount;
16221 /* The offset from the start of the minipool. */
16222 HOST_WIDE_INT offset;
16223 /* The value in table. */
16224 rtx value;
16225 /* The mode of value. */
16226 machine_mode mode;
16227 /* The size of the value. With iWMMXt enabled
16228 sizes > 4 also imply an alignment of 8-bytes. */
16229 int fix_size;
16232 struct minipool_fixup
16234 Mfix * next;
16235 rtx_insn * insn;
16236 HOST_WIDE_INT address;
16237 rtx * loc;
16238 machine_mode mode;
16239 int fix_size;
16240 rtx value;
16241 Mnode * minipool;
16242 HOST_WIDE_INT forwards;
16243 HOST_WIDE_INT backwards;
16246 /* Fixes less than a word need padding out to a word boundary. */
16247 #define MINIPOOL_FIX_SIZE(mode) \
16248 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16250 static Mnode * minipool_vector_head;
16251 static Mnode * minipool_vector_tail;
16252 static rtx_code_label *minipool_vector_label;
16253 static int minipool_pad;
16255 /* The linked list of all minipool fixes required for this function. */
16256 Mfix * minipool_fix_head;
16257 Mfix * minipool_fix_tail;
16258 /* The fix entry for the current minipool, once it has been placed. */
16259 Mfix * minipool_barrier;
16261 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16262 #define JUMP_TABLES_IN_TEXT_SECTION 0
16263 #endif
16265 static HOST_WIDE_INT
16266 get_jump_table_size (rtx_jump_table_data *insn)
16268 /* ADDR_VECs only take room if read-only data does into the text
16269 section. */
16270 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16272 rtx body = PATTERN (insn);
16273 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16274 HOST_WIDE_INT size;
16275 HOST_WIDE_INT modesize;
16277 modesize = GET_MODE_SIZE (GET_MODE (body));
16278 size = modesize * XVECLEN (body, elt);
16279 switch (modesize)
16281 case 1:
16282 /* Round up size of TBB table to a halfword boundary. */
16283 size = (size + 1) & ~(HOST_WIDE_INT)1;
16284 break;
16285 case 2:
16286 /* No padding necessary for TBH. */
16287 break;
16288 case 4:
16289 /* Add two bytes for alignment on Thumb. */
16290 if (TARGET_THUMB)
16291 size += 2;
16292 break;
16293 default:
16294 gcc_unreachable ();
16296 return size;
16299 return 0;
16302 /* Return the maximum amount of padding that will be inserted before
16303 label LABEL. */
16305 static HOST_WIDE_INT
16306 get_label_padding (rtx label)
16308 HOST_WIDE_INT align, min_insn_size;
16310 align = 1 << label_to_alignment (label);
16311 min_insn_size = TARGET_THUMB ? 2 : 4;
16312 return align > min_insn_size ? align - min_insn_size : 0;
16315 /* Move a minipool fix MP from its current location to before MAX_MP.
16316 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16317 constraints may need updating. */
16318 static Mnode *
16319 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16320 HOST_WIDE_INT max_address)
16322 /* The code below assumes these are different. */
16323 gcc_assert (mp != max_mp);
16325 if (max_mp == NULL)
16327 if (max_address < mp->max_address)
16328 mp->max_address = max_address;
16330 else
16332 if (max_address > max_mp->max_address - mp->fix_size)
16333 mp->max_address = max_mp->max_address - mp->fix_size;
16334 else
16335 mp->max_address = max_address;
16337 /* Unlink MP from its current position. Since max_mp is non-null,
16338 mp->prev must be non-null. */
16339 mp->prev->next = mp->next;
16340 if (mp->next != NULL)
16341 mp->next->prev = mp->prev;
16342 else
16343 minipool_vector_tail = mp->prev;
16345 /* Re-insert it before MAX_MP. */
16346 mp->next = max_mp;
16347 mp->prev = max_mp->prev;
16348 max_mp->prev = mp;
16350 if (mp->prev != NULL)
16351 mp->prev->next = mp;
16352 else
16353 minipool_vector_head = mp;
16356 /* Save the new entry. */
16357 max_mp = mp;
16359 /* Scan over the preceding entries and adjust their addresses as
16360 required. */
16361 while (mp->prev != NULL
16362 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16364 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16365 mp = mp->prev;
16368 return max_mp;
16371 /* Add a constant to the minipool for a forward reference. Returns the
16372 node added or NULL if the constant will not fit in this pool. */
16373 static Mnode *
16374 add_minipool_forward_ref (Mfix *fix)
16376 /* If set, max_mp is the first pool_entry that has a lower
16377 constraint than the one we are trying to add. */
16378 Mnode * max_mp = NULL;
16379 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16380 Mnode * mp;
16382 /* If the minipool starts before the end of FIX->INSN then this FIX
16383 can not be placed into the current pool. Furthermore, adding the
16384 new constant pool entry may cause the pool to start FIX_SIZE bytes
16385 earlier. */
16386 if (minipool_vector_head &&
16387 (fix->address + get_attr_length (fix->insn)
16388 >= minipool_vector_head->max_address - fix->fix_size))
16389 return NULL;
16391 /* Scan the pool to see if a constant with the same value has
16392 already been added. While we are doing this, also note the
16393 location where we must insert the constant if it doesn't already
16394 exist. */
16395 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16397 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16398 && fix->mode == mp->mode
16399 && (!LABEL_P (fix->value)
16400 || (CODE_LABEL_NUMBER (fix->value)
16401 == CODE_LABEL_NUMBER (mp->value)))
16402 && rtx_equal_p (fix->value, mp->value))
16404 /* More than one fix references this entry. */
16405 mp->refcount++;
16406 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16409 /* Note the insertion point if necessary. */
16410 if (max_mp == NULL
16411 && mp->max_address > max_address)
16412 max_mp = mp;
16414 /* If we are inserting an 8-bytes aligned quantity and
16415 we have not already found an insertion point, then
16416 make sure that all such 8-byte aligned quantities are
16417 placed at the start of the pool. */
16418 if (ARM_DOUBLEWORD_ALIGN
16419 && max_mp == NULL
16420 && fix->fix_size >= 8
16421 && mp->fix_size < 8)
16423 max_mp = mp;
16424 max_address = mp->max_address;
16428 /* The value is not currently in the minipool, so we need to create
16429 a new entry for it. If MAX_MP is NULL, the entry will be put on
16430 the end of the list since the placement is less constrained than
16431 any existing entry. Otherwise, we insert the new fix before
16432 MAX_MP and, if necessary, adjust the constraints on the other
16433 entries. */
16434 mp = XNEW (Mnode);
16435 mp->fix_size = fix->fix_size;
16436 mp->mode = fix->mode;
16437 mp->value = fix->value;
16438 mp->refcount = 1;
16439 /* Not yet required for a backwards ref. */
16440 mp->min_address = -65536;
16442 if (max_mp == NULL)
16444 mp->max_address = max_address;
16445 mp->next = NULL;
16446 mp->prev = minipool_vector_tail;
16448 if (mp->prev == NULL)
16450 minipool_vector_head = mp;
16451 minipool_vector_label = gen_label_rtx ();
16453 else
16454 mp->prev->next = mp;
16456 minipool_vector_tail = mp;
16458 else
16460 if (max_address > max_mp->max_address - mp->fix_size)
16461 mp->max_address = max_mp->max_address - mp->fix_size;
16462 else
16463 mp->max_address = max_address;
16465 mp->next = max_mp;
16466 mp->prev = max_mp->prev;
16467 max_mp->prev = mp;
16468 if (mp->prev != NULL)
16469 mp->prev->next = mp;
16470 else
16471 minipool_vector_head = mp;
16474 /* Save the new entry. */
16475 max_mp = mp;
16477 /* Scan over the preceding entries and adjust their addresses as
16478 required. */
16479 while (mp->prev != NULL
16480 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16482 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16483 mp = mp->prev;
16486 return max_mp;
16489 static Mnode *
16490 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16491 HOST_WIDE_INT min_address)
16493 HOST_WIDE_INT offset;
16495 /* The code below assumes these are different. */
16496 gcc_assert (mp != min_mp);
16498 if (min_mp == NULL)
16500 if (min_address > mp->min_address)
16501 mp->min_address = min_address;
16503 else
16505 /* We will adjust this below if it is too loose. */
16506 mp->min_address = min_address;
16508 /* Unlink MP from its current position. Since min_mp is non-null,
16509 mp->next must be non-null. */
16510 mp->next->prev = mp->prev;
16511 if (mp->prev != NULL)
16512 mp->prev->next = mp->next;
16513 else
16514 minipool_vector_head = mp->next;
16516 /* Reinsert it after MIN_MP. */
16517 mp->prev = min_mp;
16518 mp->next = min_mp->next;
16519 min_mp->next = mp;
16520 if (mp->next != NULL)
16521 mp->next->prev = mp;
16522 else
16523 minipool_vector_tail = mp;
16526 min_mp = mp;
16528 offset = 0;
16529 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16531 mp->offset = offset;
16532 if (mp->refcount > 0)
16533 offset += mp->fix_size;
16535 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16536 mp->next->min_address = mp->min_address + mp->fix_size;
16539 return min_mp;
16542 /* Add a constant to the minipool for a backward reference. Returns the
16543 node added or NULL if the constant will not fit in this pool.
16545 Note that the code for insertion for a backwards reference can be
16546 somewhat confusing because the calculated offsets for each fix do
16547 not take into account the size of the pool (which is still under
16548 construction. */
16549 static Mnode *
16550 add_minipool_backward_ref (Mfix *fix)
16552 /* If set, min_mp is the last pool_entry that has a lower constraint
16553 than the one we are trying to add. */
16554 Mnode *min_mp = NULL;
16555 /* This can be negative, since it is only a constraint. */
16556 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16557 Mnode *mp;
16559 /* If we can't reach the current pool from this insn, or if we can't
16560 insert this entry at the end of the pool without pushing other
16561 fixes out of range, then we don't try. This ensures that we
16562 can't fail later on. */
16563 if (min_address >= minipool_barrier->address
16564 || (minipool_vector_tail->min_address + fix->fix_size
16565 >= minipool_barrier->address))
16566 return NULL;
16568 /* Scan the pool to see if a constant with the same value has
16569 already been added. While we are doing this, also note the
16570 location where we must insert the constant if it doesn't already
16571 exist. */
16572 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16574 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16575 && fix->mode == mp->mode
16576 && (!LABEL_P (fix->value)
16577 || (CODE_LABEL_NUMBER (fix->value)
16578 == CODE_LABEL_NUMBER (mp->value)))
16579 && rtx_equal_p (fix->value, mp->value)
16580 /* Check that there is enough slack to move this entry to the
16581 end of the table (this is conservative). */
16582 && (mp->max_address
16583 > (minipool_barrier->address
16584 + minipool_vector_tail->offset
16585 + minipool_vector_tail->fix_size)))
16587 mp->refcount++;
16588 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16591 if (min_mp != NULL)
16592 mp->min_address += fix->fix_size;
16593 else
16595 /* Note the insertion point if necessary. */
16596 if (mp->min_address < min_address)
16598 /* For now, we do not allow the insertion of 8-byte alignment
16599 requiring nodes anywhere but at the start of the pool. */
16600 if (ARM_DOUBLEWORD_ALIGN
16601 && fix->fix_size >= 8 && mp->fix_size < 8)
16602 return NULL;
16603 else
16604 min_mp = mp;
16606 else if (mp->max_address
16607 < minipool_barrier->address + mp->offset + fix->fix_size)
16609 /* Inserting before this entry would push the fix beyond
16610 its maximum address (which can happen if we have
16611 re-located a forwards fix); force the new fix to come
16612 after it. */
16613 if (ARM_DOUBLEWORD_ALIGN
16614 && fix->fix_size >= 8 && mp->fix_size < 8)
16615 return NULL;
16616 else
16618 min_mp = mp;
16619 min_address = mp->min_address + fix->fix_size;
16622 /* Do not insert a non-8-byte aligned quantity before 8-byte
16623 aligned quantities. */
16624 else if (ARM_DOUBLEWORD_ALIGN
16625 && fix->fix_size < 8
16626 && mp->fix_size >= 8)
16628 min_mp = mp;
16629 min_address = mp->min_address + fix->fix_size;
16634 /* We need to create a new entry. */
16635 mp = XNEW (Mnode);
16636 mp->fix_size = fix->fix_size;
16637 mp->mode = fix->mode;
16638 mp->value = fix->value;
16639 mp->refcount = 1;
16640 mp->max_address = minipool_barrier->address + 65536;
16642 mp->min_address = min_address;
16644 if (min_mp == NULL)
16646 mp->prev = NULL;
16647 mp->next = minipool_vector_head;
16649 if (mp->next == NULL)
16651 minipool_vector_tail = mp;
16652 minipool_vector_label = gen_label_rtx ();
16654 else
16655 mp->next->prev = mp;
16657 minipool_vector_head = mp;
16659 else
16661 mp->next = min_mp->next;
16662 mp->prev = min_mp;
16663 min_mp->next = mp;
16665 if (mp->next != NULL)
16666 mp->next->prev = mp;
16667 else
16668 minipool_vector_tail = mp;
16671 /* Save the new entry. */
16672 min_mp = mp;
16674 if (mp->prev)
16675 mp = mp->prev;
16676 else
16677 mp->offset = 0;
16679 /* Scan over the following entries and adjust their offsets. */
16680 while (mp->next != NULL)
16682 if (mp->next->min_address < mp->min_address + mp->fix_size)
16683 mp->next->min_address = mp->min_address + mp->fix_size;
16685 if (mp->refcount)
16686 mp->next->offset = mp->offset + mp->fix_size;
16687 else
16688 mp->next->offset = mp->offset;
16690 mp = mp->next;
16693 return min_mp;
16696 static void
16697 assign_minipool_offsets (Mfix *barrier)
16699 HOST_WIDE_INT offset = 0;
16700 Mnode *mp;
16702 minipool_barrier = barrier;
16704 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16706 mp->offset = offset;
16708 if (mp->refcount > 0)
16709 offset += mp->fix_size;
16713 /* Output the literal table */
16714 static void
16715 dump_minipool (rtx_insn *scan)
16717 Mnode * mp;
16718 Mnode * nmp;
16719 int align64 = 0;
16721 if (ARM_DOUBLEWORD_ALIGN)
16722 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16723 if (mp->refcount > 0 && mp->fix_size >= 8)
16725 align64 = 1;
16726 break;
16729 if (dump_file)
16730 fprintf (dump_file,
16731 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16732 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16734 scan = emit_label_after (gen_label_rtx (), scan);
16735 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16736 scan = emit_label_after (minipool_vector_label, scan);
16738 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16740 if (mp->refcount > 0)
16742 if (dump_file)
16744 fprintf (dump_file,
16745 ";; Offset %u, min %ld, max %ld ",
16746 (unsigned) mp->offset, (unsigned long) mp->min_address,
16747 (unsigned long) mp->max_address);
16748 arm_print_value (dump_file, mp->value);
16749 fputc ('\n', dump_file);
16752 switch (GET_MODE_SIZE (mp->mode))
16754 #ifdef HAVE_consttable_1
16755 case 1:
16756 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16757 break;
16759 #endif
16760 #ifdef HAVE_consttable_2
16761 case 2:
16762 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16763 break;
16765 #endif
16766 #ifdef HAVE_consttable_4
16767 case 4:
16768 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16769 break;
16771 #endif
16772 #ifdef HAVE_consttable_8
16773 case 8:
16774 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16775 break;
16777 #endif
16778 #ifdef HAVE_consttable_16
16779 case 16:
16780 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16781 break;
16783 #endif
16784 default:
16785 gcc_unreachable ();
16789 nmp = mp->next;
16790 free (mp);
16793 minipool_vector_head = minipool_vector_tail = NULL;
16794 scan = emit_insn_after (gen_consttable_end (), scan);
16795 scan = emit_barrier_after (scan);
16798 /* Return the cost of forcibly inserting a barrier after INSN. */
16799 static int
16800 arm_barrier_cost (rtx insn)
16802 /* Basing the location of the pool on the loop depth is preferable,
16803 but at the moment, the basic block information seems to be
16804 corrupt by this stage of the compilation. */
16805 int base_cost = 50;
16806 rtx next = next_nonnote_insn (insn);
16808 if (next != NULL && LABEL_P (next))
16809 base_cost -= 20;
16811 switch (GET_CODE (insn))
16813 case CODE_LABEL:
16814 /* It will always be better to place the table before the label, rather
16815 than after it. */
16816 return 50;
16818 case INSN:
16819 case CALL_INSN:
16820 return base_cost;
16822 case JUMP_INSN:
16823 return base_cost - 10;
16825 default:
16826 return base_cost + 10;
16830 /* Find the best place in the insn stream in the range
16831 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16832 Create the barrier by inserting a jump and add a new fix entry for
16833 it. */
16834 static Mfix *
16835 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16837 HOST_WIDE_INT count = 0;
16838 rtx_barrier *barrier;
16839 rtx_insn *from = fix->insn;
16840 /* The instruction after which we will insert the jump. */
16841 rtx_insn *selected = NULL;
16842 int selected_cost;
16843 /* The address at which the jump instruction will be placed. */
16844 HOST_WIDE_INT selected_address;
16845 Mfix * new_fix;
16846 HOST_WIDE_INT max_count = max_address - fix->address;
16847 rtx_code_label *label = gen_label_rtx ();
16849 selected_cost = arm_barrier_cost (from);
16850 selected_address = fix->address;
16852 while (from && count < max_count)
16854 rtx_jump_table_data *tmp;
16855 int new_cost;
16857 /* This code shouldn't have been called if there was a natural barrier
16858 within range. */
16859 gcc_assert (!BARRIER_P (from));
16861 /* Count the length of this insn. This must stay in sync with the
16862 code that pushes minipool fixes. */
16863 if (LABEL_P (from))
16864 count += get_label_padding (from);
16865 else
16866 count += get_attr_length (from);
16868 /* If there is a jump table, add its length. */
16869 if (tablejump_p (from, NULL, &tmp))
16871 count += get_jump_table_size (tmp);
16873 /* Jump tables aren't in a basic block, so base the cost on
16874 the dispatch insn. If we select this location, we will
16875 still put the pool after the table. */
16876 new_cost = arm_barrier_cost (from);
16878 if (count < max_count
16879 && (!selected || new_cost <= selected_cost))
16881 selected = tmp;
16882 selected_cost = new_cost;
16883 selected_address = fix->address + count;
16886 /* Continue after the dispatch table. */
16887 from = NEXT_INSN (tmp);
16888 continue;
16891 new_cost = arm_barrier_cost (from);
16893 if (count < max_count
16894 && (!selected || new_cost <= selected_cost))
16896 selected = from;
16897 selected_cost = new_cost;
16898 selected_address = fix->address + count;
16901 from = NEXT_INSN (from);
16904 /* Make sure that we found a place to insert the jump. */
16905 gcc_assert (selected);
16907 /* Make sure we do not split a call and its corresponding
16908 CALL_ARG_LOCATION note. */
16909 if (CALL_P (selected))
16911 rtx_insn *next = NEXT_INSN (selected);
16912 if (next && NOTE_P (next)
16913 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16914 selected = next;
16917 /* Create a new JUMP_INSN that branches around a barrier. */
16918 from = emit_jump_insn_after (gen_jump (label), selected);
16919 JUMP_LABEL (from) = label;
16920 barrier = emit_barrier_after (from);
16921 emit_label_after (label, barrier);
16923 /* Create a minipool barrier entry for the new barrier. */
16924 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16925 new_fix->insn = barrier;
16926 new_fix->address = selected_address;
16927 new_fix->next = fix->next;
16928 fix->next = new_fix;
16930 return new_fix;
16933 /* Record that there is a natural barrier in the insn stream at
16934 ADDRESS. */
16935 static void
16936 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16938 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16940 fix->insn = insn;
16941 fix->address = address;
16943 fix->next = NULL;
16944 if (minipool_fix_head != NULL)
16945 minipool_fix_tail->next = fix;
16946 else
16947 minipool_fix_head = fix;
16949 minipool_fix_tail = fix;
16952 /* Record INSN, which will need fixing up to load a value from the
16953 minipool. ADDRESS is the offset of the insn since the start of the
16954 function; LOC is a pointer to the part of the insn which requires
16955 fixing; VALUE is the constant that must be loaded, which is of type
16956 MODE. */
16957 static void
16958 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16959 machine_mode mode, rtx value)
16961 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16963 fix->insn = insn;
16964 fix->address = address;
16965 fix->loc = loc;
16966 fix->mode = mode;
16967 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16968 fix->value = value;
16969 fix->forwards = get_attr_pool_range (insn);
16970 fix->backwards = get_attr_neg_pool_range (insn);
16971 fix->minipool = NULL;
16973 /* If an insn doesn't have a range defined for it, then it isn't
16974 expecting to be reworked by this code. Better to stop now than
16975 to generate duff assembly code. */
16976 gcc_assert (fix->forwards || fix->backwards);
16978 /* If an entry requires 8-byte alignment then assume all constant pools
16979 require 4 bytes of padding. Trying to do this later on a per-pool
16980 basis is awkward because existing pool entries have to be modified. */
16981 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16982 minipool_pad = 4;
16984 if (dump_file)
16986 fprintf (dump_file,
16987 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16988 GET_MODE_NAME (mode),
16989 INSN_UID (insn), (unsigned long) address,
16990 -1 * (long)fix->backwards, (long)fix->forwards);
16991 arm_print_value (dump_file, fix->value);
16992 fprintf (dump_file, "\n");
16995 /* Add it to the chain of fixes. */
16996 fix->next = NULL;
16998 if (minipool_fix_head != NULL)
16999 minipool_fix_tail->next = fix;
17000 else
17001 minipool_fix_head = fix;
17003 minipool_fix_tail = fix;
17006 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
17007 Returns the number of insns needed, or 99 if we always want to synthesize
17008 the value. */
17010 arm_max_const_double_inline_cost ()
17012 /* Let the value get synthesized to avoid the use of literal pools. */
17013 if (arm_disable_literal_pool)
17014 return 99;
17016 return ((optimize_size || arm_ld_sched) ? 3 : 4);
17019 /* Return the cost of synthesizing a 64-bit constant VAL inline.
17020 Returns the number of insns needed, or 99 if we don't know how to
17021 do it. */
17023 arm_const_double_inline_cost (rtx val)
17025 rtx lowpart, highpart;
17026 machine_mode mode;
17028 mode = GET_MODE (val);
17030 if (mode == VOIDmode)
17031 mode = DImode;
17033 gcc_assert (GET_MODE_SIZE (mode) == 8);
17035 lowpart = gen_lowpart (SImode, val);
17036 highpart = gen_highpart_mode (SImode, mode, val);
17038 gcc_assert (CONST_INT_P (lowpart));
17039 gcc_assert (CONST_INT_P (highpart));
17041 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
17042 NULL_RTX, NULL_RTX, 0, 0)
17043 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
17044 NULL_RTX, NULL_RTX, 0, 0));
17047 /* Cost of loading a SImode constant. */
17048 static inline int
17049 arm_const_inline_cost (enum rtx_code code, rtx val)
17051 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
17052 NULL_RTX, NULL_RTX, 1, 0);
17055 /* Return true if it is worthwhile to split a 64-bit constant into two
17056 32-bit operations. This is the case if optimizing for size, or
17057 if we have load delay slots, or if one 32-bit part can be done with
17058 a single data operation. */
17059 bool
17060 arm_const_double_by_parts (rtx val)
17062 machine_mode mode = GET_MODE (val);
17063 rtx part;
17065 if (optimize_size || arm_ld_sched)
17066 return true;
17068 if (mode == VOIDmode)
17069 mode = DImode;
17071 part = gen_highpart_mode (SImode, mode, val);
17073 gcc_assert (CONST_INT_P (part));
17075 if (const_ok_for_arm (INTVAL (part))
17076 || const_ok_for_arm (~INTVAL (part)))
17077 return true;
17079 part = gen_lowpart (SImode, val);
17081 gcc_assert (CONST_INT_P (part));
17083 if (const_ok_for_arm (INTVAL (part))
17084 || const_ok_for_arm (~INTVAL (part)))
17085 return true;
17087 return false;
17090 /* Return true if it is possible to inline both the high and low parts
17091 of a 64-bit constant into 32-bit data processing instructions. */
17092 bool
17093 arm_const_double_by_immediates (rtx val)
17095 machine_mode mode = GET_MODE (val);
17096 rtx part;
17098 if (mode == VOIDmode)
17099 mode = DImode;
17101 part = gen_highpart_mode (SImode, mode, val);
17103 gcc_assert (CONST_INT_P (part));
17105 if (!const_ok_for_arm (INTVAL (part)))
17106 return false;
17108 part = gen_lowpart (SImode, val);
17110 gcc_assert (CONST_INT_P (part));
17112 if (!const_ok_for_arm (INTVAL (part)))
17113 return false;
17115 return true;
17118 /* Scan INSN and note any of its operands that need fixing.
17119 If DO_PUSHES is false we do not actually push any of the fixups
17120 needed. */
17121 static void
17122 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17124 int opno;
17126 extract_constrain_insn (insn);
17128 if (recog_data.n_alternatives == 0)
17129 return;
17131 /* Fill in recog_op_alt with information about the constraints of
17132 this insn. */
17133 preprocess_constraints (insn);
17135 const operand_alternative *op_alt = which_op_alt ();
17136 for (opno = 0; opno < recog_data.n_operands; opno++)
17138 /* Things we need to fix can only occur in inputs. */
17139 if (recog_data.operand_type[opno] != OP_IN)
17140 continue;
17142 /* If this alternative is a memory reference, then any mention
17143 of constants in this alternative is really to fool reload
17144 into allowing us to accept one there. We need to fix them up
17145 now so that we output the right code. */
17146 if (op_alt[opno].memory_ok)
17148 rtx op = recog_data.operand[opno];
17150 if (CONSTANT_P (op))
17152 if (do_pushes)
17153 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17154 recog_data.operand_mode[opno], op);
17156 else if (MEM_P (op)
17157 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17158 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17160 if (do_pushes)
17162 rtx cop = avoid_constant_pool_reference (op);
17164 /* Casting the address of something to a mode narrower
17165 than a word can cause avoid_constant_pool_reference()
17166 to return the pool reference itself. That's no good to
17167 us here. Lets just hope that we can use the
17168 constant pool value directly. */
17169 if (op == cop)
17170 cop = get_pool_constant (XEXP (op, 0));
17172 push_minipool_fix (insn, address,
17173 recog_data.operand_loc[opno],
17174 recog_data.operand_mode[opno], cop);
17181 return;
17184 /* Rewrite move insn into subtract of 0 if the condition codes will
17185 be useful in next conditional jump insn. */
17187 static void
17188 thumb1_reorg (void)
17190 basic_block bb;
17192 FOR_EACH_BB_FN (bb, cfun)
17194 rtx dest, src;
17195 rtx pat, op0, set = NULL;
17196 rtx_insn *prev, *insn = BB_END (bb);
17197 bool insn_clobbered = false;
17199 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17200 insn = PREV_INSN (insn);
17202 /* Find the last cbranchsi4_insn in basic block BB. */
17203 if (insn == BB_HEAD (bb)
17204 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17205 continue;
17207 /* Get the register with which we are comparing. */
17208 pat = PATTERN (insn);
17209 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
17211 /* Find the first flag setting insn before INSN in basic block BB. */
17212 gcc_assert (insn != BB_HEAD (bb));
17213 for (prev = PREV_INSN (insn);
17214 (!insn_clobbered
17215 && prev != BB_HEAD (bb)
17216 && (NOTE_P (prev)
17217 || DEBUG_INSN_P (prev)
17218 || ((set = single_set (prev)) != NULL
17219 && get_attr_conds (prev) == CONDS_NOCOND)));
17220 prev = PREV_INSN (prev))
17222 if (reg_set_p (op0, prev))
17223 insn_clobbered = true;
17226 /* Skip if op0 is clobbered by insn other than prev. */
17227 if (insn_clobbered)
17228 continue;
17230 if (!set)
17231 continue;
17233 dest = SET_DEST (set);
17234 src = SET_SRC (set);
17235 if (!low_register_operand (dest, SImode)
17236 || !low_register_operand (src, SImode))
17237 continue;
17239 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17240 in INSN. Both src and dest of the move insn are checked. */
17241 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17243 dest = copy_rtx (dest);
17244 src = copy_rtx (src);
17245 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17246 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
17247 INSN_CODE (prev) = -1;
17248 /* Set test register in INSN to dest. */
17249 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
17250 INSN_CODE (insn) = -1;
17255 /* Convert instructions to their cc-clobbering variant if possible, since
17256 that allows us to use smaller encodings. */
17258 static void
17259 thumb2_reorg (void)
17261 basic_block bb;
17262 regset_head live;
17264 INIT_REG_SET (&live);
17266 /* We are freeing block_for_insn in the toplev to keep compatibility
17267 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17268 compute_bb_for_insn ();
17269 df_analyze ();
17271 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17273 FOR_EACH_BB_FN (bb, cfun)
17275 if (current_tune->disparage_flag_setting_t16_encodings
17276 && optimize_bb_for_speed_p (bb))
17277 continue;
17279 rtx_insn *insn;
17280 Convert_Action action = SKIP;
17281 Convert_Action action_for_partial_flag_setting
17282 = (current_tune->disparage_partial_flag_setting_t16_encodings
17283 && optimize_bb_for_speed_p (bb))
17284 ? SKIP : CONV;
17286 COPY_REG_SET (&live, DF_LR_OUT (bb));
17287 df_simulate_initialize_backwards (bb, &live);
17288 FOR_BB_INSNS_REVERSE (bb, insn)
17290 if (NONJUMP_INSN_P (insn)
17291 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17292 && GET_CODE (PATTERN (insn)) == SET)
17294 action = SKIP;
17295 rtx pat = PATTERN (insn);
17296 rtx dst = XEXP (pat, 0);
17297 rtx src = XEXP (pat, 1);
17298 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17300 if (UNARY_P (src) || BINARY_P (src))
17301 op0 = XEXP (src, 0);
17303 if (BINARY_P (src))
17304 op1 = XEXP (src, 1);
17306 if (low_register_operand (dst, SImode))
17308 switch (GET_CODE (src))
17310 case PLUS:
17311 /* Adding two registers and storing the result
17312 in the first source is already a 16-bit
17313 operation. */
17314 if (rtx_equal_p (dst, op0)
17315 && register_operand (op1, SImode))
17316 break;
17318 if (low_register_operand (op0, SImode))
17320 /* ADDS <Rd>,<Rn>,<Rm> */
17321 if (low_register_operand (op1, SImode))
17322 action = CONV;
17323 /* ADDS <Rdn>,#<imm8> */
17324 /* SUBS <Rdn>,#<imm8> */
17325 else if (rtx_equal_p (dst, op0)
17326 && CONST_INT_P (op1)
17327 && IN_RANGE (INTVAL (op1), -255, 255))
17328 action = CONV;
17329 /* ADDS <Rd>,<Rn>,#<imm3> */
17330 /* SUBS <Rd>,<Rn>,#<imm3> */
17331 else if (CONST_INT_P (op1)
17332 && IN_RANGE (INTVAL (op1), -7, 7))
17333 action = CONV;
17335 /* ADCS <Rd>, <Rn> */
17336 else if (GET_CODE (XEXP (src, 0)) == PLUS
17337 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17338 && low_register_operand (XEXP (XEXP (src, 0), 1),
17339 SImode)
17340 && COMPARISON_P (op1)
17341 && cc_register (XEXP (op1, 0), VOIDmode)
17342 && maybe_get_arm_condition_code (op1) == ARM_CS
17343 && XEXP (op1, 1) == const0_rtx)
17344 action = CONV;
17345 break;
17347 case MINUS:
17348 /* RSBS <Rd>,<Rn>,#0
17349 Not handled here: see NEG below. */
17350 /* SUBS <Rd>,<Rn>,#<imm3>
17351 SUBS <Rdn>,#<imm8>
17352 Not handled here: see PLUS above. */
17353 /* SUBS <Rd>,<Rn>,<Rm> */
17354 if (low_register_operand (op0, SImode)
17355 && low_register_operand (op1, SImode))
17356 action = CONV;
17357 break;
17359 case MULT:
17360 /* MULS <Rdm>,<Rn>,<Rdm>
17361 As an exception to the rule, this is only used
17362 when optimizing for size since MULS is slow on all
17363 known implementations. We do not even want to use
17364 MULS in cold code, if optimizing for speed, so we
17365 test the global flag here. */
17366 if (!optimize_size)
17367 break;
17368 /* else fall through. */
17369 case AND:
17370 case IOR:
17371 case XOR:
17372 /* ANDS <Rdn>,<Rm> */
17373 if (rtx_equal_p (dst, op0)
17374 && low_register_operand (op1, SImode))
17375 action = action_for_partial_flag_setting;
17376 else if (rtx_equal_p (dst, op1)
17377 && low_register_operand (op0, SImode))
17378 action = action_for_partial_flag_setting == SKIP
17379 ? SKIP : SWAP_CONV;
17380 break;
17382 case ASHIFTRT:
17383 case ASHIFT:
17384 case LSHIFTRT:
17385 /* ASRS <Rdn>,<Rm> */
17386 /* LSRS <Rdn>,<Rm> */
17387 /* LSLS <Rdn>,<Rm> */
17388 if (rtx_equal_p (dst, op0)
17389 && low_register_operand (op1, SImode))
17390 action = action_for_partial_flag_setting;
17391 /* ASRS <Rd>,<Rm>,#<imm5> */
17392 /* LSRS <Rd>,<Rm>,#<imm5> */
17393 /* LSLS <Rd>,<Rm>,#<imm5> */
17394 else if (low_register_operand (op0, SImode)
17395 && CONST_INT_P (op1)
17396 && IN_RANGE (INTVAL (op1), 0, 31))
17397 action = action_for_partial_flag_setting;
17398 break;
17400 case ROTATERT:
17401 /* RORS <Rdn>,<Rm> */
17402 if (rtx_equal_p (dst, op0)
17403 && low_register_operand (op1, SImode))
17404 action = action_for_partial_flag_setting;
17405 break;
17407 case NOT:
17408 /* MVNS <Rd>,<Rm> */
17409 if (low_register_operand (op0, SImode))
17410 action = action_for_partial_flag_setting;
17411 break;
17413 case NEG:
17414 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17415 if (low_register_operand (op0, SImode))
17416 action = CONV;
17417 break;
17419 case CONST_INT:
17420 /* MOVS <Rd>,#<imm8> */
17421 if (CONST_INT_P (src)
17422 && IN_RANGE (INTVAL (src), 0, 255))
17423 action = action_for_partial_flag_setting;
17424 break;
17426 case REG:
17427 /* MOVS and MOV<c> with registers have different
17428 encodings, so are not relevant here. */
17429 break;
17431 default:
17432 break;
17436 if (action != SKIP)
17438 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17439 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17440 rtvec vec;
17442 if (action == SWAP_CONV)
17444 src = copy_rtx (src);
17445 XEXP (src, 0) = op1;
17446 XEXP (src, 1) = op0;
17447 pat = gen_rtx_SET (VOIDmode, dst, src);
17448 vec = gen_rtvec (2, pat, clobber);
17450 else /* action == CONV */
17451 vec = gen_rtvec (2, pat, clobber);
17453 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17454 INSN_CODE (insn) = -1;
17458 if (NONDEBUG_INSN_P (insn))
17459 df_simulate_one_insn_backwards (bb, insn, &live);
17463 CLEAR_REG_SET (&live);
17466 /* Gcc puts the pool in the wrong place for ARM, since we can only
17467 load addresses a limited distance around the pc. We do some
17468 special munging to move the constant pool values to the correct
17469 point in the code. */
17470 static void
17471 arm_reorg (void)
17473 rtx_insn *insn;
17474 HOST_WIDE_INT address = 0;
17475 Mfix * fix;
17477 if (TARGET_THUMB1)
17478 thumb1_reorg ();
17479 else if (TARGET_THUMB2)
17480 thumb2_reorg ();
17482 /* Ensure all insns that must be split have been split at this point.
17483 Otherwise, the pool placement code below may compute incorrect
17484 insn lengths. Note that when optimizing, all insns have already
17485 been split at this point. */
17486 if (!optimize)
17487 split_all_insns_noflow ();
17489 minipool_fix_head = minipool_fix_tail = NULL;
17491 /* The first insn must always be a note, or the code below won't
17492 scan it properly. */
17493 insn = get_insns ();
17494 gcc_assert (NOTE_P (insn));
17495 minipool_pad = 0;
17497 /* Scan all the insns and record the operands that will need fixing. */
17498 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17500 if (BARRIER_P (insn))
17501 push_minipool_barrier (insn, address);
17502 else if (INSN_P (insn))
17504 rtx_jump_table_data *table;
17506 note_invalid_constants (insn, address, true);
17507 address += get_attr_length (insn);
17509 /* If the insn is a vector jump, add the size of the table
17510 and skip the table. */
17511 if (tablejump_p (insn, NULL, &table))
17513 address += get_jump_table_size (table);
17514 insn = table;
17517 else if (LABEL_P (insn))
17518 /* Add the worst-case padding due to alignment. We don't add
17519 the _current_ padding because the minipool insertions
17520 themselves might change it. */
17521 address += get_label_padding (insn);
17524 fix = minipool_fix_head;
17526 /* Now scan the fixups and perform the required changes. */
17527 while (fix)
17529 Mfix * ftmp;
17530 Mfix * fdel;
17531 Mfix * last_added_fix;
17532 Mfix * last_barrier = NULL;
17533 Mfix * this_fix;
17535 /* Skip any further barriers before the next fix. */
17536 while (fix && BARRIER_P (fix->insn))
17537 fix = fix->next;
17539 /* No more fixes. */
17540 if (fix == NULL)
17541 break;
17543 last_added_fix = NULL;
17545 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17547 if (BARRIER_P (ftmp->insn))
17549 if (ftmp->address >= minipool_vector_head->max_address)
17550 break;
17552 last_barrier = ftmp;
17554 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17555 break;
17557 last_added_fix = ftmp; /* Keep track of the last fix added. */
17560 /* If we found a barrier, drop back to that; any fixes that we
17561 could have reached but come after the barrier will now go in
17562 the next mini-pool. */
17563 if (last_barrier != NULL)
17565 /* Reduce the refcount for those fixes that won't go into this
17566 pool after all. */
17567 for (fdel = last_barrier->next;
17568 fdel && fdel != ftmp;
17569 fdel = fdel->next)
17571 fdel->minipool->refcount--;
17572 fdel->minipool = NULL;
17575 ftmp = last_barrier;
17577 else
17579 /* ftmp is first fix that we can't fit into this pool and
17580 there no natural barriers that we could use. Insert a
17581 new barrier in the code somewhere between the previous
17582 fix and this one, and arrange to jump around it. */
17583 HOST_WIDE_INT max_address;
17585 /* The last item on the list of fixes must be a barrier, so
17586 we can never run off the end of the list of fixes without
17587 last_barrier being set. */
17588 gcc_assert (ftmp);
17590 max_address = minipool_vector_head->max_address;
17591 /* Check that there isn't another fix that is in range that
17592 we couldn't fit into this pool because the pool was
17593 already too large: we need to put the pool before such an
17594 instruction. The pool itself may come just after the
17595 fix because create_fix_barrier also allows space for a
17596 jump instruction. */
17597 if (ftmp->address < max_address)
17598 max_address = ftmp->address + 1;
17600 last_barrier = create_fix_barrier (last_added_fix, max_address);
17603 assign_minipool_offsets (last_barrier);
17605 while (ftmp)
17607 if (!BARRIER_P (ftmp->insn)
17608 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17609 == NULL))
17610 break;
17612 ftmp = ftmp->next;
17615 /* Scan over the fixes we have identified for this pool, fixing them
17616 up and adding the constants to the pool itself. */
17617 for (this_fix = fix; this_fix && ftmp != this_fix;
17618 this_fix = this_fix->next)
17619 if (!BARRIER_P (this_fix->insn))
17621 rtx addr
17622 = plus_constant (Pmode,
17623 gen_rtx_LABEL_REF (VOIDmode,
17624 minipool_vector_label),
17625 this_fix->minipool->offset);
17626 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17629 dump_minipool (last_barrier->insn);
17630 fix = ftmp;
17633 /* From now on we must synthesize any constants that we can't handle
17634 directly. This can happen if the RTL gets split during final
17635 instruction generation. */
17636 cfun->machine->after_arm_reorg = 1;
17638 /* Free the minipool memory. */
17639 obstack_free (&minipool_obstack, minipool_startobj);
17642 /* Routines to output assembly language. */
17644 /* Return string representation of passed in real value. */
17645 static const char *
17646 fp_const_from_val (REAL_VALUE_TYPE *r)
17648 if (!fp_consts_inited)
17649 init_fp_table ();
17651 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17652 return "0";
17655 /* OPERANDS[0] is the entire list of insns that constitute pop,
17656 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17657 is in the list, UPDATE is true iff the list contains explicit
17658 update of base register. */
17659 void
17660 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17661 bool update)
17663 int i;
17664 char pattern[100];
17665 int offset;
17666 const char *conditional;
17667 int num_saves = XVECLEN (operands[0], 0);
17668 unsigned int regno;
17669 unsigned int regno_base = REGNO (operands[1]);
17671 offset = 0;
17672 offset += update ? 1 : 0;
17673 offset += return_pc ? 1 : 0;
17675 /* Is the base register in the list? */
17676 for (i = offset; i < num_saves; i++)
17678 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17679 /* If SP is in the list, then the base register must be SP. */
17680 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17681 /* If base register is in the list, there must be no explicit update. */
17682 if (regno == regno_base)
17683 gcc_assert (!update);
17686 conditional = reverse ? "%?%D0" : "%?%d0";
17687 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17689 /* Output pop (not stmfd) because it has a shorter encoding. */
17690 gcc_assert (update);
17691 sprintf (pattern, "pop%s\t{", conditional);
17693 else
17695 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17696 It's just a convention, their semantics are identical. */
17697 if (regno_base == SP_REGNUM)
17698 sprintf (pattern, "ldm%sfd\t", conditional);
17699 else if (TARGET_UNIFIED_ASM)
17700 sprintf (pattern, "ldmia%s\t", conditional);
17701 else
17702 sprintf (pattern, "ldm%sia\t", conditional);
17704 strcat (pattern, reg_names[regno_base]);
17705 if (update)
17706 strcat (pattern, "!, {");
17707 else
17708 strcat (pattern, ", {");
17711 /* Output the first destination register. */
17712 strcat (pattern,
17713 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17715 /* Output the rest of the destination registers. */
17716 for (i = offset + 1; i < num_saves; i++)
17718 strcat (pattern, ", ");
17719 strcat (pattern,
17720 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17723 strcat (pattern, "}");
17725 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17726 strcat (pattern, "^");
17728 output_asm_insn (pattern, &cond);
17732 /* Output the assembly for a store multiple. */
17734 const char *
17735 vfp_output_vstmd (rtx * operands)
17737 char pattern[100];
17738 int p;
17739 int base;
17740 int i;
17741 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17742 ? XEXP (operands[0], 0)
17743 : XEXP (XEXP (operands[0], 0), 0);
17744 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17746 if (push_p)
17747 strcpy (pattern, "vpush%?.64\t{%P1");
17748 else
17749 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17751 p = strlen (pattern);
17753 gcc_assert (REG_P (operands[1]));
17755 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17756 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17758 p += sprintf (&pattern[p], ", d%d", base + i);
17760 strcpy (&pattern[p], "}");
17762 output_asm_insn (pattern, operands);
17763 return "";
17767 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17768 number of bytes pushed. */
17770 static int
17771 vfp_emit_fstmd (int base_reg, int count)
17773 rtx par;
17774 rtx dwarf;
17775 rtx tmp, reg;
17776 int i;
17778 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17779 register pairs are stored by a store multiple insn. We avoid this
17780 by pushing an extra pair. */
17781 if (count == 2 && !arm_arch6)
17783 if (base_reg == LAST_VFP_REGNUM - 3)
17784 base_reg -= 2;
17785 count++;
17788 /* FSTMD may not store more than 16 doubleword registers at once. Split
17789 larger stores into multiple parts (up to a maximum of two, in
17790 practice). */
17791 if (count > 16)
17793 int saved;
17794 /* NOTE: base_reg is an internal register number, so each D register
17795 counts as 2. */
17796 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17797 saved += vfp_emit_fstmd (base_reg, 16);
17798 return saved;
17801 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17802 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17804 reg = gen_rtx_REG (DFmode, base_reg);
17805 base_reg += 2;
17807 XVECEXP (par, 0, 0)
17808 = gen_rtx_SET (VOIDmode,
17809 gen_frame_mem
17810 (BLKmode,
17811 gen_rtx_PRE_MODIFY (Pmode,
17812 stack_pointer_rtx,
17813 plus_constant
17814 (Pmode, stack_pointer_rtx,
17815 - (count * 8)))
17817 gen_rtx_UNSPEC (BLKmode,
17818 gen_rtvec (1, reg),
17819 UNSPEC_PUSH_MULT));
17821 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17822 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17823 RTX_FRAME_RELATED_P (tmp) = 1;
17824 XVECEXP (dwarf, 0, 0) = tmp;
17826 tmp = gen_rtx_SET (VOIDmode,
17827 gen_frame_mem (DFmode, stack_pointer_rtx),
17828 reg);
17829 RTX_FRAME_RELATED_P (tmp) = 1;
17830 XVECEXP (dwarf, 0, 1) = tmp;
17832 for (i = 1; i < count; i++)
17834 reg = gen_rtx_REG (DFmode, base_reg);
17835 base_reg += 2;
17836 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17838 tmp = gen_rtx_SET (VOIDmode,
17839 gen_frame_mem (DFmode,
17840 plus_constant (Pmode,
17841 stack_pointer_rtx,
17842 i * 8)),
17843 reg);
17844 RTX_FRAME_RELATED_P (tmp) = 1;
17845 XVECEXP (dwarf, 0, i + 1) = tmp;
17848 par = emit_insn (par);
17849 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17850 RTX_FRAME_RELATED_P (par) = 1;
17852 return count * 8;
17855 /* Emit a call instruction with pattern PAT. ADDR is the address of
17856 the call target. */
17858 void
17859 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17861 rtx insn;
17863 insn = emit_call_insn (pat);
17865 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17866 If the call might use such an entry, add a use of the PIC register
17867 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17868 if (TARGET_VXWORKS_RTP
17869 && flag_pic
17870 && !sibcall
17871 && GET_CODE (addr) == SYMBOL_REF
17872 && (SYMBOL_REF_DECL (addr)
17873 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17874 : !SYMBOL_REF_LOCAL_P (addr)))
17876 require_pic_register ();
17877 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17880 if (TARGET_AAPCS_BASED)
17882 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17883 linker. We need to add an IP clobber to allow setting
17884 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17885 is not needed since it's a fixed register. */
17886 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17887 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17891 /* Output a 'call' insn. */
17892 const char *
17893 output_call (rtx *operands)
17895 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17897 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17898 if (REGNO (operands[0]) == LR_REGNUM)
17900 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17901 output_asm_insn ("mov%?\t%0, %|lr", operands);
17904 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17906 if (TARGET_INTERWORK || arm_arch4t)
17907 output_asm_insn ("bx%?\t%0", operands);
17908 else
17909 output_asm_insn ("mov%?\t%|pc, %0", operands);
17911 return "";
17914 /* Output a 'call' insn that is a reference in memory. This is
17915 disabled for ARMv5 and we prefer a blx instead because otherwise
17916 there's a significant performance overhead. */
17917 const char *
17918 output_call_mem (rtx *operands)
17920 gcc_assert (!arm_arch5);
17921 if (TARGET_INTERWORK)
17923 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17924 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17925 output_asm_insn ("bx%?\t%|ip", operands);
17927 else if (regno_use_in (LR_REGNUM, operands[0]))
17929 /* LR is used in the memory address. We load the address in the
17930 first instruction. It's safe to use IP as the target of the
17931 load since the call will kill it anyway. */
17932 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17933 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17934 if (arm_arch4t)
17935 output_asm_insn ("bx%?\t%|ip", operands);
17936 else
17937 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17939 else
17941 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17942 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17945 return "";
17949 /* Output a move from arm registers to arm registers of a long double
17950 OPERANDS[0] is the destination.
17951 OPERANDS[1] is the source. */
17952 const char *
17953 output_mov_long_double_arm_from_arm (rtx *operands)
17955 /* We have to be careful here because the two might overlap. */
17956 int dest_start = REGNO (operands[0]);
17957 int src_start = REGNO (operands[1]);
17958 rtx ops[2];
17959 int i;
17961 if (dest_start < src_start)
17963 for (i = 0; i < 3; i++)
17965 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17966 ops[1] = gen_rtx_REG (SImode, src_start + i);
17967 output_asm_insn ("mov%?\t%0, %1", ops);
17970 else
17972 for (i = 2; i >= 0; i--)
17974 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17975 ops[1] = gen_rtx_REG (SImode, src_start + i);
17976 output_asm_insn ("mov%?\t%0, %1", ops);
17980 return "";
17983 void
17984 arm_emit_movpair (rtx dest, rtx src)
17986 /* If the src is an immediate, simplify it. */
17987 if (CONST_INT_P (src))
17989 HOST_WIDE_INT val = INTVAL (src);
17990 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17991 if ((val >> 16) & 0x0000ffff)
17992 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17993 GEN_INT (16)),
17994 GEN_INT ((val >> 16) & 0x0000ffff));
17995 return;
17997 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17998 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
18001 /* Output a move between double words. It must be REG<-MEM
18002 or MEM<-REG. */
18003 const char *
18004 output_move_double (rtx *operands, bool emit, int *count)
18006 enum rtx_code code0 = GET_CODE (operands[0]);
18007 enum rtx_code code1 = GET_CODE (operands[1]);
18008 rtx otherops[3];
18009 if (count)
18010 *count = 1;
18012 /* The only case when this might happen is when
18013 you are looking at the length of a DImode instruction
18014 that has an invalid constant in it. */
18015 if (code0 == REG && code1 != MEM)
18017 gcc_assert (!emit);
18018 *count = 2;
18019 return "";
18022 if (code0 == REG)
18024 unsigned int reg0 = REGNO (operands[0]);
18026 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
18028 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
18030 switch (GET_CODE (XEXP (operands[1], 0)))
18032 case REG:
18034 if (emit)
18036 if (TARGET_LDRD
18037 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18038 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
18039 else
18040 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18042 break;
18044 case PRE_INC:
18045 gcc_assert (TARGET_LDRD);
18046 if (emit)
18047 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
18048 break;
18050 case PRE_DEC:
18051 if (emit)
18053 if (TARGET_LDRD)
18054 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
18055 else
18056 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
18058 break;
18060 case POST_INC:
18061 if (emit)
18063 if (TARGET_LDRD)
18064 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
18065 else
18066 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
18068 break;
18070 case POST_DEC:
18071 gcc_assert (TARGET_LDRD);
18072 if (emit)
18073 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
18074 break;
18076 case PRE_MODIFY:
18077 case POST_MODIFY:
18078 /* Autoicrement addressing modes should never have overlapping
18079 base and destination registers, and overlapping index registers
18080 are already prohibited, so this doesn't need to worry about
18081 fix_cm3_ldrd. */
18082 otherops[0] = operands[0];
18083 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18084 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18086 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18088 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18090 /* Registers overlap so split out the increment. */
18091 if (emit)
18093 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18094 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
18096 if (count)
18097 *count = 2;
18099 else
18101 /* Use a single insn if we can.
18102 FIXME: IWMMXT allows offsets larger than ldrd can
18103 handle, fix these up with a pair of ldr. */
18104 if (TARGET_THUMB2
18105 || !CONST_INT_P (otherops[2])
18106 || (INTVAL (otherops[2]) > -256
18107 && INTVAL (otherops[2]) < 256))
18109 if (emit)
18110 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
18112 else
18114 if (emit)
18116 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18117 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18119 if (count)
18120 *count = 2;
18125 else
18127 /* Use a single insn if we can.
18128 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18129 fix these up with a pair of ldr. */
18130 if (TARGET_THUMB2
18131 || !CONST_INT_P (otherops[2])
18132 || (INTVAL (otherops[2]) > -256
18133 && INTVAL (otherops[2]) < 256))
18135 if (emit)
18136 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
18138 else
18140 if (emit)
18142 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18143 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18145 if (count)
18146 *count = 2;
18149 break;
18151 case LABEL_REF:
18152 case CONST:
18153 /* We might be able to use ldrd %0, %1 here. However the range is
18154 different to ldr/adr, and it is broken on some ARMv7-M
18155 implementations. */
18156 /* Use the second register of the pair to avoid problematic
18157 overlap. */
18158 otherops[1] = operands[1];
18159 if (emit)
18160 output_asm_insn ("adr%?\t%0, %1", otherops);
18161 operands[1] = otherops[0];
18162 if (emit)
18164 if (TARGET_LDRD)
18165 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18166 else
18167 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
18170 if (count)
18171 *count = 2;
18172 break;
18174 /* ??? This needs checking for thumb2. */
18175 default:
18176 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18177 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18179 otherops[0] = operands[0];
18180 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18181 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18183 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18185 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18187 switch ((int) INTVAL (otherops[2]))
18189 case -8:
18190 if (emit)
18191 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
18192 return "";
18193 case -4:
18194 if (TARGET_THUMB2)
18195 break;
18196 if (emit)
18197 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
18198 return "";
18199 case 4:
18200 if (TARGET_THUMB2)
18201 break;
18202 if (emit)
18203 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
18204 return "";
18207 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18208 operands[1] = otherops[0];
18209 if (TARGET_LDRD
18210 && (REG_P (otherops[2])
18211 || TARGET_THUMB2
18212 || (CONST_INT_P (otherops[2])
18213 && INTVAL (otherops[2]) > -256
18214 && INTVAL (otherops[2]) < 256)))
18216 if (reg_overlap_mentioned_p (operands[0],
18217 otherops[2]))
18219 /* Swap base and index registers over to
18220 avoid a conflict. */
18221 std::swap (otherops[1], otherops[2]);
18223 /* If both registers conflict, it will usually
18224 have been fixed by a splitter. */
18225 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18226 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18228 if (emit)
18230 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18231 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18233 if (count)
18234 *count = 2;
18236 else
18238 otherops[0] = operands[0];
18239 if (emit)
18240 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
18242 return "";
18245 if (CONST_INT_P (otherops[2]))
18247 if (emit)
18249 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18250 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18251 else
18252 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18255 else
18257 if (emit)
18258 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18261 else
18263 if (emit)
18264 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18267 if (count)
18268 *count = 2;
18270 if (TARGET_LDRD)
18271 return "ldr%(d%)\t%0, [%1]";
18273 return "ldm%(ia%)\t%1, %M0";
18275 else
18277 otherops[1] = adjust_address (operands[1], SImode, 4);
18278 /* Take care of overlapping base/data reg. */
18279 if (reg_mentioned_p (operands[0], operands[1]))
18281 if (emit)
18283 output_asm_insn ("ldr%?\t%0, %1", otherops);
18284 output_asm_insn ("ldr%?\t%0, %1", operands);
18286 if (count)
18287 *count = 2;
18290 else
18292 if (emit)
18294 output_asm_insn ("ldr%?\t%0, %1", operands);
18295 output_asm_insn ("ldr%?\t%0, %1", otherops);
18297 if (count)
18298 *count = 2;
18303 else
18305 /* Constraints should ensure this. */
18306 gcc_assert (code0 == MEM && code1 == REG);
18307 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18308 || (TARGET_ARM && TARGET_LDRD));
18310 switch (GET_CODE (XEXP (operands[0], 0)))
18312 case REG:
18313 if (emit)
18315 if (TARGET_LDRD)
18316 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18317 else
18318 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18320 break;
18322 case PRE_INC:
18323 gcc_assert (TARGET_LDRD);
18324 if (emit)
18325 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18326 break;
18328 case PRE_DEC:
18329 if (emit)
18331 if (TARGET_LDRD)
18332 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18333 else
18334 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18336 break;
18338 case POST_INC:
18339 if (emit)
18341 if (TARGET_LDRD)
18342 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18343 else
18344 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18346 break;
18348 case POST_DEC:
18349 gcc_assert (TARGET_LDRD);
18350 if (emit)
18351 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18352 break;
18354 case PRE_MODIFY:
18355 case POST_MODIFY:
18356 otherops[0] = operands[1];
18357 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18358 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18360 /* IWMMXT allows offsets larger than ldrd can handle,
18361 fix these up with a pair of ldr. */
18362 if (!TARGET_THUMB2
18363 && CONST_INT_P (otherops[2])
18364 && (INTVAL(otherops[2]) <= -256
18365 || INTVAL(otherops[2]) >= 256))
18367 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18369 if (emit)
18371 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18372 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18374 if (count)
18375 *count = 2;
18377 else
18379 if (emit)
18381 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18382 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18384 if (count)
18385 *count = 2;
18388 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18390 if (emit)
18391 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18393 else
18395 if (emit)
18396 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18398 break;
18400 case PLUS:
18401 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18402 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18404 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18406 case -8:
18407 if (emit)
18408 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18409 return "";
18411 case -4:
18412 if (TARGET_THUMB2)
18413 break;
18414 if (emit)
18415 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18416 return "";
18418 case 4:
18419 if (TARGET_THUMB2)
18420 break;
18421 if (emit)
18422 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18423 return "";
18426 if (TARGET_LDRD
18427 && (REG_P (otherops[2])
18428 || TARGET_THUMB2
18429 || (CONST_INT_P (otherops[2])
18430 && INTVAL (otherops[2]) > -256
18431 && INTVAL (otherops[2]) < 256)))
18433 otherops[0] = operands[1];
18434 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18435 if (emit)
18436 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18437 return "";
18439 /* Fall through */
18441 default:
18442 otherops[0] = adjust_address (operands[0], SImode, 4);
18443 otherops[1] = operands[1];
18444 if (emit)
18446 output_asm_insn ("str%?\t%1, %0", operands);
18447 output_asm_insn ("str%?\t%H1, %0", otherops);
18449 if (count)
18450 *count = 2;
18454 return "";
18457 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18458 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18460 const char *
18461 output_move_quad (rtx *operands)
18463 if (REG_P (operands[0]))
18465 /* Load, or reg->reg move. */
18467 if (MEM_P (operands[1]))
18469 switch (GET_CODE (XEXP (operands[1], 0)))
18471 case REG:
18472 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18473 break;
18475 case LABEL_REF:
18476 case CONST:
18477 output_asm_insn ("adr%?\t%0, %1", operands);
18478 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18479 break;
18481 default:
18482 gcc_unreachable ();
18485 else
18487 rtx ops[2];
18488 int dest, src, i;
18490 gcc_assert (REG_P (operands[1]));
18492 dest = REGNO (operands[0]);
18493 src = REGNO (operands[1]);
18495 /* This seems pretty dumb, but hopefully GCC won't try to do it
18496 very often. */
18497 if (dest < src)
18498 for (i = 0; i < 4; i++)
18500 ops[0] = gen_rtx_REG (SImode, dest + i);
18501 ops[1] = gen_rtx_REG (SImode, src + i);
18502 output_asm_insn ("mov%?\t%0, %1", ops);
18504 else
18505 for (i = 3; i >= 0; i--)
18507 ops[0] = gen_rtx_REG (SImode, dest + i);
18508 ops[1] = gen_rtx_REG (SImode, src + i);
18509 output_asm_insn ("mov%?\t%0, %1", ops);
18513 else
18515 gcc_assert (MEM_P (operands[0]));
18516 gcc_assert (REG_P (operands[1]));
18517 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18519 switch (GET_CODE (XEXP (operands[0], 0)))
18521 case REG:
18522 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18523 break;
18525 default:
18526 gcc_unreachable ();
18530 return "";
18533 /* Output a VFP load or store instruction. */
18535 const char *
18536 output_move_vfp (rtx *operands)
18538 rtx reg, mem, addr, ops[2];
18539 int load = REG_P (operands[0]);
18540 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18541 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18542 const char *templ;
18543 char buff[50];
18544 machine_mode mode;
18546 reg = operands[!load];
18547 mem = operands[load];
18549 mode = GET_MODE (reg);
18551 gcc_assert (REG_P (reg));
18552 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18553 gcc_assert (mode == SFmode
18554 || mode == DFmode
18555 || mode == SImode
18556 || mode == DImode
18557 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18558 gcc_assert (MEM_P (mem));
18560 addr = XEXP (mem, 0);
18562 switch (GET_CODE (addr))
18564 case PRE_DEC:
18565 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18566 ops[0] = XEXP (addr, 0);
18567 ops[1] = reg;
18568 break;
18570 case POST_INC:
18571 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18572 ops[0] = XEXP (addr, 0);
18573 ops[1] = reg;
18574 break;
18576 default:
18577 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18578 ops[0] = reg;
18579 ops[1] = mem;
18580 break;
18583 sprintf (buff, templ,
18584 load ? "ld" : "st",
18585 dp ? "64" : "32",
18586 dp ? "P" : "",
18587 integer_p ? "\t%@ int" : "");
18588 output_asm_insn (buff, ops);
18590 return "";
18593 /* Output a Neon double-word or quad-word load or store, or a load
18594 or store for larger structure modes.
18596 WARNING: The ordering of elements is weird in big-endian mode,
18597 because the EABI requires that vectors stored in memory appear
18598 as though they were stored by a VSTM, as required by the EABI.
18599 GCC RTL defines element ordering based on in-memory order.
18600 This can be different from the architectural ordering of elements
18601 within a NEON register. The intrinsics defined in arm_neon.h use the
18602 NEON register element ordering, not the GCC RTL element ordering.
18604 For example, the in-memory ordering of a big-endian a quadword
18605 vector with 16-bit elements when stored from register pair {d0,d1}
18606 will be (lowest address first, d0[N] is NEON register element N):
18608 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18610 When necessary, quadword registers (dN, dN+1) are moved to ARM
18611 registers from rN in the order:
18613 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18615 So that STM/LDM can be used on vectors in ARM registers, and the
18616 same memory layout will result as if VSTM/VLDM were used.
18618 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18619 possible, which allows use of appropriate alignment tags.
18620 Note that the choice of "64" is independent of the actual vector
18621 element size; this size simply ensures that the behavior is
18622 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18624 Due to limitations of those instructions, use of VST1.64/VLD1.64
18625 is not possible if:
18626 - the address contains PRE_DEC, or
18627 - the mode refers to more than 4 double-word registers
18629 In those cases, it would be possible to replace VSTM/VLDM by a
18630 sequence of instructions; this is not currently implemented since
18631 this is not certain to actually improve performance. */
18633 const char *
18634 output_move_neon (rtx *operands)
18636 rtx reg, mem, addr, ops[2];
18637 int regno, nregs, load = REG_P (operands[0]);
18638 const char *templ;
18639 char buff[50];
18640 machine_mode mode;
18642 reg = operands[!load];
18643 mem = operands[load];
18645 mode = GET_MODE (reg);
18647 gcc_assert (REG_P (reg));
18648 regno = REGNO (reg);
18649 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18650 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18651 || NEON_REGNO_OK_FOR_QUAD (regno));
18652 gcc_assert (VALID_NEON_DREG_MODE (mode)
18653 || VALID_NEON_QREG_MODE (mode)
18654 || VALID_NEON_STRUCT_MODE (mode));
18655 gcc_assert (MEM_P (mem));
18657 addr = XEXP (mem, 0);
18659 /* Strip off const from addresses like (const (plus (...))). */
18660 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18661 addr = XEXP (addr, 0);
18663 switch (GET_CODE (addr))
18665 case POST_INC:
18666 /* We have to use vldm / vstm for too-large modes. */
18667 if (nregs > 4)
18669 templ = "v%smia%%?\t%%0!, %%h1";
18670 ops[0] = XEXP (addr, 0);
18672 else
18674 templ = "v%s1.64\t%%h1, %%A0";
18675 ops[0] = mem;
18677 ops[1] = reg;
18678 break;
18680 case PRE_DEC:
18681 /* We have to use vldm / vstm in this case, since there is no
18682 pre-decrement form of the vld1 / vst1 instructions. */
18683 templ = "v%smdb%%?\t%%0!, %%h1";
18684 ops[0] = XEXP (addr, 0);
18685 ops[1] = reg;
18686 break;
18688 case POST_MODIFY:
18689 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18690 gcc_unreachable ();
18692 case REG:
18693 /* We have to use vldm / vstm for too-large modes. */
18694 if (nregs > 1)
18696 if (nregs > 4)
18697 templ = "v%smia%%?\t%%m0, %%h1";
18698 else
18699 templ = "v%s1.64\t%%h1, %%A0";
18701 ops[0] = mem;
18702 ops[1] = reg;
18703 break;
18705 /* Fall through. */
18706 case LABEL_REF:
18707 case PLUS:
18709 int i;
18710 int overlap = -1;
18711 for (i = 0; i < nregs; i++)
18713 /* We're only using DImode here because it's a convenient size. */
18714 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18715 ops[1] = adjust_address (mem, DImode, 8 * i);
18716 if (reg_overlap_mentioned_p (ops[0], mem))
18718 gcc_assert (overlap == -1);
18719 overlap = i;
18721 else
18723 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18724 output_asm_insn (buff, ops);
18727 if (overlap != -1)
18729 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18730 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18731 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18732 output_asm_insn (buff, ops);
18735 return "";
18738 default:
18739 gcc_unreachable ();
18742 sprintf (buff, templ, load ? "ld" : "st");
18743 output_asm_insn (buff, ops);
18745 return "";
18748 /* Compute and return the length of neon_mov<mode>, where <mode> is
18749 one of VSTRUCT modes: EI, OI, CI or XI. */
18751 arm_attr_length_move_neon (rtx_insn *insn)
18753 rtx reg, mem, addr;
18754 int load;
18755 machine_mode mode;
18757 extract_insn_cached (insn);
18759 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18761 mode = GET_MODE (recog_data.operand[0]);
18762 switch (mode)
18764 case EImode:
18765 case OImode:
18766 return 8;
18767 case CImode:
18768 return 12;
18769 case XImode:
18770 return 16;
18771 default:
18772 gcc_unreachable ();
18776 load = REG_P (recog_data.operand[0]);
18777 reg = recog_data.operand[!load];
18778 mem = recog_data.operand[load];
18780 gcc_assert (MEM_P (mem));
18782 mode = GET_MODE (reg);
18783 addr = XEXP (mem, 0);
18785 /* Strip off const from addresses like (const (plus (...))). */
18786 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18787 addr = XEXP (addr, 0);
18789 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18791 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18792 return insns * 4;
18794 else
18795 return 4;
18798 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18799 return zero. */
18802 arm_address_offset_is_imm (rtx_insn *insn)
18804 rtx mem, addr;
18806 extract_insn_cached (insn);
18808 if (REG_P (recog_data.operand[0]))
18809 return 0;
18811 mem = recog_data.operand[0];
18813 gcc_assert (MEM_P (mem));
18815 addr = XEXP (mem, 0);
18817 if (REG_P (addr)
18818 || (GET_CODE (addr) == PLUS
18819 && REG_P (XEXP (addr, 0))
18820 && CONST_INT_P (XEXP (addr, 1))))
18821 return 1;
18822 else
18823 return 0;
18826 /* Output an ADD r, s, #n where n may be too big for one instruction.
18827 If adding zero to one register, output nothing. */
18828 const char *
18829 output_add_immediate (rtx *operands)
18831 HOST_WIDE_INT n = INTVAL (operands[2]);
18833 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18835 if (n < 0)
18836 output_multi_immediate (operands,
18837 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18838 -n);
18839 else
18840 output_multi_immediate (operands,
18841 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18845 return "";
18848 /* Output a multiple immediate operation.
18849 OPERANDS is the vector of operands referred to in the output patterns.
18850 INSTR1 is the output pattern to use for the first constant.
18851 INSTR2 is the output pattern to use for subsequent constants.
18852 IMMED_OP is the index of the constant slot in OPERANDS.
18853 N is the constant value. */
18854 static const char *
18855 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18856 int immed_op, HOST_WIDE_INT n)
18858 #if HOST_BITS_PER_WIDE_INT > 32
18859 n &= 0xffffffff;
18860 #endif
18862 if (n == 0)
18864 /* Quick and easy output. */
18865 operands[immed_op] = const0_rtx;
18866 output_asm_insn (instr1, operands);
18868 else
18870 int i;
18871 const char * instr = instr1;
18873 /* Note that n is never zero here (which would give no output). */
18874 for (i = 0; i < 32; i += 2)
18876 if (n & (3 << i))
18878 operands[immed_op] = GEN_INT (n & (255 << i));
18879 output_asm_insn (instr, operands);
18880 instr = instr2;
18881 i += 6;
18886 return "";
18889 /* Return the name of a shifter operation. */
18890 static const char *
18891 arm_shift_nmem(enum rtx_code code)
18893 switch (code)
18895 case ASHIFT:
18896 return ARM_LSL_NAME;
18898 case ASHIFTRT:
18899 return "asr";
18901 case LSHIFTRT:
18902 return "lsr";
18904 case ROTATERT:
18905 return "ror";
18907 default:
18908 abort();
18912 /* Return the appropriate ARM instruction for the operation code.
18913 The returned result should not be overwritten. OP is the rtx of the
18914 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18915 was shifted. */
18916 const char *
18917 arithmetic_instr (rtx op, int shift_first_arg)
18919 switch (GET_CODE (op))
18921 case PLUS:
18922 return "add";
18924 case MINUS:
18925 return shift_first_arg ? "rsb" : "sub";
18927 case IOR:
18928 return "orr";
18930 case XOR:
18931 return "eor";
18933 case AND:
18934 return "and";
18936 case ASHIFT:
18937 case ASHIFTRT:
18938 case LSHIFTRT:
18939 case ROTATERT:
18940 return arm_shift_nmem(GET_CODE(op));
18942 default:
18943 gcc_unreachable ();
18947 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18948 for the operation code. The returned result should not be overwritten.
18949 OP is the rtx code of the shift.
18950 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18951 shift. */
18952 static const char *
18953 shift_op (rtx op, HOST_WIDE_INT *amountp)
18955 const char * mnem;
18956 enum rtx_code code = GET_CODE (op);
18958 switch (code)
18960 case ROTATE:
18961 if (!CONST_INT_P (XEXP (op, 1)))
18963 output_operand_lossage ("invalid shift operand");
18964 return NULL;
18967 code = ROTATERT;
18968 *amountp = 32 - INTVAL (XEXP (op, 1));
18969 mnem = "ror";
18970 break;
18972 case ASHIFT:
18973 case ASHIFTRT:
18974 case LSHIFTRT:
18975 case ROTATERT:
18976 mnem = arm_shift_nmem(code);
18977 if (CONST_INT_P (XEXP (op, 1)))
18979 *amountp = INTVAL (XEXP (op, 1));
18981 else if (REG_P (XEXP (op, 1)))
18983 *amountp = -1;
18984 return mnem;
18986 else
18988 output_operand_lossage ("invalid shift operand");
18989 return NULL;
18991 break;
18993 case MULT:
18994 /* We never have to worry about the amount being other than a
18995 power of 2, since this case can never be reloaded from a reg. */
18996 if (!CONST_INT_P (XEXP (op, 1)))
18998 output_operand_lossage ("invalid shift operand");
18999 return NULL;
19002 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
19004 /* Amount must be a power of two. */
19005 if (*amountp & (*amountp - 1))
19007 output_operand_lossage ("invalid shift operand");
19008 return NULL;
19011 *amountp = int_log2 (*amountp);
19012 return ARM_LSL_NAME;
19014 default:
19015 output_operand_lossage ("invalid shift operand");
19016 return NULL;
19019 /* This is not 100% correct, but follows from the desire to merge
19020 multiplication by a power of 2 with the recognizer for a
19021 shift. >=32 is not a valid shift for "lsl", so we must try and
19022 output a shift that produces the correct arithmetical result.
19023 Using lsr #32 is identical except for the fact that the carry bit
19024 is not set correctly if we set the flags; but we never use the
19025 carry bit from such an operation, so we can ignore that. */
19026 if (code == ROTATERT)
19027 /* Rotate is just modulo 32. */
19028 *amountp &= 31;
19029 else if (*amountp != (*amountp & 31))
19031 if (code == ASHIFT)
19032 mnem = "lsr";
19033 *amountp = 32;
19036 /* Shifts of 0 are no-ops. */
19037 if (*amountp == 0)
19038 return NULL;
19040 return mnem;
19043 /* Obtain the shift from the POWER of two. */
19045 static HOST_WIDE_INT
19046 int_log2 (HOST_WIDE_INT power)
19048 HOST_WIDE_INT shift = 0;
19050 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
19052 gcc_assert (shift <= 31);
19053 shift++;
19056 return shift;
19059 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19060 because /bin/as is horribly restrictive. The judgement about
19061 whether or not each character is 'printable' (and can be output as
19062 is) or not (and must be printed with an octal escape) must be made
19063 with reference to the *host* character set -- the situation is
19064 similar to that discussed in the comments above pp_c_char in
19065 c-pretty-print.c. */
19067 #define MAX_ASCII_LEN 51
19069 void
19070 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19072 int i;
19073 int len_so_far = 0;
19075 fputs ("\t.ascii\t\"", stream);
19077 for (i = 0; i < len; i++)
19079 int c = p[i];
19081 if (len_so_far >= MAX_ASCII_LEN)
19083 fputs ("\"\n\t.ascii\t\"", stream);
19084 len_so_far = 0;
19087 if (ISPRINT (c))
19089 if (c == '\\' || c == '\"')
19091 putc ('\\', stream);
19092 len_so_far++;
19094 putc (c, stream);
19095 len_so_far++;
19097 else
19099 fprintf (stream, "\\%03o", c);
19100 len_so_far += 4;
19104 fputs ("\"\n", stream);
19107 /* Whether a register is callee saved or not. This is necessary because high
19108 registers are marked as caller saved when optimizing for size on Thumb-1
19109 targets despite being callee saved in order to avoid using them. */
19110 #define callee_saved_reg_p(reg) \
19111 (!call_used_regs[reg] \
19112 || (TARGET_THUMB1 && optimize_size \
19113 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19115 /* Compute the register save mask for registers 0 through 12
19116 inclusive. This code is used by arm_compute_save_reg_mask. */
19118 static unsigned long
19119 arm_compute_save_reg0_reg12_mask (void)
19121 unsigned long func_type = arm_current_func_type ();
19122 unsigned long save_reg_mask = 0;
19123 unsigned int reg;
19125 if (IS_INTERRUPT (func_type))
19127 unsigned int max_reg;
19128 /* Interrupt functions must not corrupt any registers,
19129 even call clobbered ones. If this is a leaf function
19130 we can just examine the registers used by the RTL, but
19131 otherwise we have to assume that whatever function is
19132 called might clobber anything, and so we have to save
19133 all the call-clobbered registers as well. */
19134 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19135 /* FIQ handlers have registers r8 - r12 banked, so
19136 we only need to check r0 - r7, Normal ISRs only
19137 bank r14 and r15, so we must check up to r12.
19138 r13 is the stack pointer which is always preserved,
19139 so we do not need to consider it here. */
19140 max_reg = 7;
19141 else
19142 max_reg = 12;
19144 for (reg = 0; reg <= max_reg; reg++)
19145 if (df_regs_ever_live_p (reg)
19146 || (! crtl->is_leaf && call_used_regs[reg]))
19147 save_reg_mask |= (1 << reg);
19149 /* Also save the pic base register if necessary. */
19150 if (flag_pic
19151 && !TARGET_SINGLE_PIC_BASE
19152 && arm_pic_register != INVALID_REGNUM
19153 && crtl->uses_pic_offset_table)
19154 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19156 else if (IS_VOLATILE(func_type))
19158 /* For noreturn functions we historically omitted register saves
19159 altogether. However this really messes up debugging. As a
19160 compromise save just the frame pointers. Combined with the link
19161 register saved elsewhere this should be sufficient to get
19162 a backtrace. */
19163 if (frame_pointer_needed)
19164 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19165 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19166 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19167 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19168 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19170 else
19172 /* In the normal case we only need to save those registers
19173 which are call saved and which are used by this function. */
19174 for (reg = 0; reg <= 11; reg++)
19175 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19176 save_reg_mask |= (1 << reg);
19178 /* Handle the frame pointer as a special case. */
19179 if (frame_pointer_needed)
19180 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19182 /* If we aren't loading the PIC register,
19183 don't stack it even though it may be live. */
19184 if (flag_pic
19185 && !TARGET_SINGLE_PIC_BASE
19186 && arm_pic_register != INVALID_REGNUM
19187 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19188 || crtl->uses_pic_offset_table))
19189 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19191 /* The prologue will copy SP into R0, so save it. */
19192 if (IS_STACKALIGN (func_type))
19193 save_reg_mask |= 1;
19196 /* Save registers so the exception handler can modify them. */
19197 if (crtl->calls_eh_return)
19199 unsigned int i;
19201 for (i = 0; ; i++)
19203 reg = EH_RETURN_DATA_REGNO (i);
19204 if (reg == INVALID_REGNUM)
19205 break;
19206 save_reg_mask |= 1 << reg;
19210 return save_reg_mask;
19213 /* Return true if r3 is live at the start of the function. */
19215 static bool
19216 arm_r3_live_at_start_p (void)
19218 /* Just look at cfg info, which is still close enough to correct at this
19219 point. This gives false positives for broken functions that might use
19220 uninitialized data that happens to be allocated in r3, but who cares? */
19221 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19224 /* Compute the number of bytes used to store the static chain register on the
19225 stack, above the stack frame. We need to know this accurately to get the
19226 alignment of the rest of the stack frame correct. */
19228 static int
19229 arm_compute_static_chain_stack_bytes (void)
19231 /* See the defining assertion in arm_expand_prologue. */
19232 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
19233 && IS_NESTED (arm_current_func_type ())
19234 && arm_r3_live_at_start_p ()
19235 && crtl->args.pretend_args_size == 0)
19236 return 4;
19238 return 0;
19241 /* Compute a bit mask of which registers need to be
19242 saved on the stack for the current function.
19243 This is used by arm_get_frame_offsets, which may add extra registers. */
19245 static unsigned long
19246 arm_compute_save_reg_mask (void)
19248 unsigned int save_reg_mask = 0;
19249 unsigned long func_type = arm_current_func_type ();
19250 unsigned int reg;
19252 if (IS_NAKED (func_type))
19253 /* This should never really happen. */
19254 return 0;
19256 /* If we are creating a stack frame, then we must save the frame pointer,
19257 IP (which will hold the old stack pointer), LR and the PC. */
19258 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19259 save_reg_mask |=
19260 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19261 | (1 << IP_REGNUM)
19262 | (1 << LR_REGNUM)
19263 | (1 << PC_REGNUM);
19265 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19267 /* Decide if we need to save the link register.
19268 Interrupt routines have their own banked link register,
19269 so they never need to save it.
19270 Otherwise if we do not use the link register we do not need to save
19271 it. If we are pushing other registers onto the stack however, we
19272 can save an instruction in the epilogue by pushing the link register
19273 now and then popping it back into the PC. This incurs extra memory
19274 accesses though, so we only do it when optimizing for size, and only
19275 if we know that we will not need a fancy return sequence. */
19276 if (df_regs_ever_live_p (LR_REGNUM)
19277 || (save_reg_mask
19278 && optimize_size
19279 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19280 && !crtl->tail_call_emit
19281 && !crtl->calls_eh_return))
19282 save_reg_mask |= 1 << LR_REGNUM;
19284 if (cfun->machine->lr_save_eliminated)
19285 save_reg_mask &= ~ (1 << LR_REGNUM);
19287 if (TARGET_REALLY_IWMMXT
19288 && ((bit_count (save_reg_mask)
19289 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19290 arm_compute_static_chain_stack_bytes())
19291 ) % 2) != 0)
19293 /* The total number of registers that are going to be pushed
19294 onto the stack is odd. We need to ensure that the stack
19295 is 64-bit aligned before we start to save iWMMXt registers,
19296 and also before we start to create locals. (A local variable
19297 might be a double or long long which we will load/store using
19298 an iWMMXt instruction). Therefore we need to push another
19299 ARM register, so that the stack will be 64-bit aligned. We
19300 try to avoid using the arg registers (r0 -r3) as they might be
19301 used to pass values in a tail call. */
19302 for (reg = 4; reg <= 12; reg++)
19303 if ((save_reg_mask & (1 << reg)) == 0)
19304 break;
19306 if (reg <= 12)
19307 save_reg_mask |= (1 << reg);
19308 else
19310 cfun->machine->sibcall_blocked = 1;
19311 save_reg_mask |= (1 << 3);
19315 /* We may need to push an additional register for use initializing the
19316 PIC base register. */
19317 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19318 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19320 reg = thumb_find_work_register (1 << 4);
19321 if (!call_used_regs[reg])
19322 save_reg_mask |= (1 << reg);
19325 return save_reg_mask;
19329 /* Compute a bit mask of which registers need to be
19330 saved on the stack for the current function. */
19331 static unsigned long
19332 thumb1_compute_save_reg_mask (void)
19334 unsigned long mask;
19335 unsigned reg;
19337 mask = 0;
19338 for (reg = 0; reg < 12; reg ++)
19339 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19340 mask |= 1 << reg;
19342 if (flag_pic
19343 && !TARGET_SINGLE_PIC_BASE
19344 && arm_pic_register != INVALID_REGNUM
19345 && crtl->uses_pic_offset_table)
19346 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19348 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19349 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19350 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19352 /* LR will also be pushed if any lo regs are pushed. */
19353 if (mask & 0xff || thumb_force_lr_save ())
19354 mask |= (1 << LR_REGNUM);
19356 /* Make sure we have a low work register if we need one.
19357 We will need one if we are going to push a high register,
19358 but we are not currently intending to push a low register. */
19359 if ((mask & 0xff) == 0
19360 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19362 /* Use thumb_find_work_register to choose which register
19363 we will use. If the register is live then we will
19364 have to push it. Use LAST_LO_REGNUM as our fallback
19365 choice for the register to select. */
19366 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19367 /* Make sure the register returned by thumb_find_work_register is
19368 not part of the return value. */
19369 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19370 reg = LAST_LO_REGNUM;
19372 if (callee_saved_reg_p (reg))
19373 mask |= 1 << reg;
19376 /* The 504 below is 8 bytes less than 512 because there are two possible
19377 alignment words. We can't tell here if they will be present or not so we
19378 have to play it safe and assume that they are. */
19379 if ((CALLER_INTERWORKING_SLOT_SIZE +
19380 ROUND_UP_WORD (get_frame_size ()) +
19381 crtl->outgoing_args_size) >= 504)
19383 /* This is the same as the code in thumb1_expand_prologue() which
19384 determines which register to use for stack decrement. */
19385 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19386 if (mask & (1 << reg))
19387 break;
19389 if (reg > LAST_LO_REGNUM)
19391 /* Make sure we have a register available for stack decrement. */
19392 mask |= 1 << LAST_LO_REGNUM;
19396 return mask;
19400 /* Return the number of bytes required to save VFP registers. */
19401 static int
19402 arm_get_vfp_saved_size (void)
19404 unsigned int regno;
19405 int count;
19406 int saved;
19408 saved = 0;
19409 /* Space for saved VFP registers. */
19410 if (TARGET_HARD_FLOAT && TARGET_VFP)
19412 count = 0;
19413 for (regno = FIRST_VFP_REGNUM;
19414 regno < LAST_VFP_REGNUM;
19415 regno += 2)
19417 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19418 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19420 if (count > 0)
19422 /* Workaround ARM10 VFPr1 bug. */
19423 if (count == 2 && !arm_arch6)
19424 count++;
19425 saved += count * 8;
19427 count = 0;
19429 else
19430 count++;
19432 if (count > 0)
19434 if (count == 2 && !arm_arch6)
19435 count++;
19436 saved += count * 8;
19439 return saved;
19443 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19444 everything bar the final return instruction. If simple_return is true,
19445 then do not output epilogue, because it has already been emitted in RTL. */
19446 const char *
19447 output_return_instruction (rtx operand, bool really_return, bool reverse,
19448 bool simple_return)
19450 char conditional[10];
19451 char instr[100];
19452 unsigned reg;
19453 unsigned long live_regs_mask;
19454 unsigned long func_type;
19455 arm_stack_offsets *offsets;
19457 func_type = arm_current_func_type ();
19459 if (IS_NAKED (func_type))
19460 return "";
19462 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19464 /* If this function was declared non-returning, and we have
19465 found a tail call, then we have to trust that the called
19466 function won't return. */
19467 if (really_return)
19469 rtx ops[2];
19471 /* Otherwise, trap an attempted return by aborting. */
19472 ops[0] = operand;
19473 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19474 : "abort");
19475 assemble_external_libcall (ops[1]);
19476 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19479 return "";
19482 gcc_assert (!cfun->calls_alloca || really_return);
19484 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19486 cfun->machine->return_used_this_function = 1;
19488 offsets = arm_get_frame_offsets ();
19489 live_regs_mask = offsets->saved_regs_mask;
19491 if (!simple_return && live_regs_mask)
19493 const char * return_reg;
19495 /* If we do not have any special requirements for function exit
19496 (e.g. interworking) then we can load the return address
19497 directly into the PC. Otherwise we must load it into LR. */
19498 if (really_return
19499 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19500 return_reg = reg_names[PC_REGNUM];
19501 else
19502 return_reg = reg_names[LR_REGNUM];
19504 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19506 /* There are three possible reasons for the IP register
19507 being saved. 1) a stack frame was created, in which case
19508 IP contains the old stack pointer, or 2) an ISR routine
19509 corrupted it, or 3) it was saved to align the stack on
19510 iWMMXt. In case 1, restore IP into SP, otherwise just
19511 restore IP. */
19512 if (frame_pointer_needed)
19514 live_regs_mask &= ~ (1 << IP_REGNUM);
19515 live_regs_mask |= (1 << SP_REGNUM);
19517 else
19518 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19521 /* On some ARM architectures it is faster to use LDR rather than
19522 LDM to load a single register. On other architectures, the
19523 cost is the same. In 26 bit mode, or for exception handlers,
19524 we have to use LDM to load the PC so that the CPSR is also
19525 restored. */
19526 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19527 if (live_regs_mask == (1U << reg))
19528 break;
19530 if (reg <= LAST_ARM_REGNUM
19531 && (reg != LR_REGNUM
19532 || ! really_return
19533 || ! IS_INTERRUPT (func_type)))
19535 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19536 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19538 else
19540 char *p;
19541 int first = 1;
19543 /* Generate the load multiple instruction to restore the
19544 registers. Note we can get here, even if
19545 frame_pointer_needed is true, but only if sp already
19546 points to the base of the saved core registers. */
19547 if (live_regs_mask & (1 << SP_REGNUM))
19549 unsigned HOST_WIDE_INT stack_adjust;
19551 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19552 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19554 if (stack_adjust && arm_arch5 && TARGET_ARM)
19555 if (TARGET_UNIFIED_ASM)
19556 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19557 else
19558 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19559 else
19561 /* If we can't use ldmib (SA110 bug),
19562 then try to pop r3 instead. */
19563 if (stack_adjust)
19564 live_regs_mask |= 1 << 3;
19566 if (TARGET_UNIFIED_ASM)
19567 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19568 else
19569 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19572 else
19573 if (TARGET_UNIFIED_ASM)
19574 sprintf (instr, "pop%s\t{", conditional);
19575 else
19576 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19578 p = instr + strlen (instr);
19580 for (reg = 0; reg <= SP_REGNUM; reg++)
19581 if (live_regs_mask & (1 << reg))
19583 int l = strlen (reg_names[reg]);
19585 if (first)
19586 first = 0;
19587 else
19589 memcpy (p, ", ", 2);
19590 p += 2;
19593 memcpy (p, "%|", 2);
19594 memcpy (p + 2, reg_names[reg], l);
19595 p += l + 2;
19598 if (live_regs_mask & (1 << LR_REGNUM))
19600 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19601 /* If returning from an interrupt, restore the CPSR. */
19602 if (IS_INTERRUPT (func_type))
19603 strcat (p, "^");
19605 else
19606 strcpy (p, "}");
19609 output_asm_insn (instr, & operand);
19611 /* See if we need to generate an extra instruction to
19612 perform the actual function return. */
19613 if (really_return
19614 && func_type != ARM_FT_INTERWORKED
19615 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19617 /* The return has already been handled
19618 by loading the LR into the PC. */
19619 return "";
19623 if (really_return)
19625 switch ((int) ARM_FUNC_TYPE (func_type))
19627 case ARM_FT_ISR:
19628 case ARM_FT_FIQ:
19629 /* ??? This is wrong for unified assembly syntax. */
19630 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19631 break;
19633 case ARM_FT_INTERWORKED:
19634 sprintf (instr, "bx%s\t%%|lr", conditional);
19635 break;
19637 case ARM_FT_EXCEPTION:
19638 /* ??? This is wrong for unified assembly syntax. */
19639 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19640 break;
19642 default:
19643 /* Use bx if it's available. */
19644 if (arm_arch5 || arm_arch4t)
19645 sprintf (instr, "bx%s\t%%|lr", conditional);
19646 else
19647 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19648 break;
19651 output_asm_insn (instr, & operand);
19654 return "";
19657 /* Write the function name into the code section, directly preceding
19658 the function prologue.
19660 Code will be output similar to this:
19662 .ascii "arm_poke_function_name", 0
19663 .align
19665 .word 0xff000000 + (t1 - t0)
19666 arm_poke_function_name
19667 mov ip, sp
19668 stmfd sp!, {fp, ip, lr, pc}
19669 sub fp, ip, #4
19671 When performing a stack backtrace, code can inspect the value
19672 of 'pc' stored at 'fp' + 0. If the trace function then looks
19673 at location pc - 12 and the top 8 bits are set, then we know
19674 that there is a function name embedded immediately preceding this
19675 location and has length ((pc[-3]) & 0xff000000).
19677 We assume that pc is declared as a pointer to an unsigned long.
19679 It is of no benefit to output the function name if we are assembling
19680 a leaf function. These function types will not contain a stack
19681 backtrace structure, therefore it is not possible to determine the
19682 function name. */
19683 void
19684 arm_poke_function_name (FILE *stream, const char *name)
19686 unsigned long alignlength;
19687 unsigned long length;
19688 rtx x;
19690 length = strlen (name) + 1;
19691 alignlength = ROUND_UP_WORD (length);
19693 ASM_OUTPUT_ASCII (stream, name, length);
19694 ASM_OUTPUT_ALIGN (stream, 2);
19695 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19696 assemble_aligned_integer (UNITS_PER_WORD, x);
19699 /* Place some comments into the assembler stream
19700 describing the current function. */
19701 static void
19702 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19704 unsigned long func_type;
19706 /* ??? Do we want to print some of the below anyway? */
19707 if (TARGET_THUMB1)
19708 return;
19710 /* Sanity check. */
19711 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19713 func_type = arm_current_func_type ();
19715 switch ((int) ARM_FUNC_TYPE (func_type))
19717 default:
19718 case ARM_FT_NORMAL:
19719 break;
19720 case ARM_FT_INTERWORKED:
19721 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19722 break;
19723 case ARM_FT_ISR:
19724 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19725 break;
19726 case ARM_FT_FIQ:
19727 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19728 break;
19729 case ARM_FT_EXCEPTION:
19730 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19731 break;
19734 if (IS_NAKED (func_type))
19735 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19737 if (IS_VOLATILE (func_type))
19738 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19740 if (IS_NESTED (func_type))
19741 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19742 if (IS_STACKALIGN (func_type))
19743 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19745 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19746 crtl->args.size,
19747 crtl->args.pretend_args_size, frame_size);
19749 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19750 frame_pointer_needed,
19751 cfun->machine->uses_anonymous_args);
19753 if (cfun->machine->lr_save_eliminated)
19754 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19756 if (crtl->calls_eh_return)
19757 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19761 static void
19762 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19763 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19765 arm_stack_offsets *offsets;
19767 if (TARGET_THUMB1)
19769 int regno;
19771 /* Emit any call-via-reg trampolines that are needed for v4t support
19772 of call_reg and call_value_reg type insns. */
19773 for (regno = 0; regno < LR_REGNUM; regno++)
19775 rtx label = cfun->machine->call_via[regno];
19777 if (label != NULL)
19779 switch_to_section (function_section (current_function_decl));
19780 targetm.asm_out.internal_label (asm_out_file, "L",
19781 CODE_LABEL_NUMBER (label));
19782 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19786 /* ??? Probably not safe to set this here, since it assumes that a
19787 function will be emitted as assembly immediately after we generate
19788 RTL for it. This does not happen for inline functions. */
19789 cfun->machine->return_used_this_function = 0;
19791 else /* TARGET_32BIT */
19793 /* We need to take into account any stack-frame rounding. */
19794 offsets = arm_get_frame_offsets ();
19796 gcc_assert (!use_return_insn (FALSE, NULL)
19797 || (cfun->machine->return_used_this_function != 0)
19798 || offsets->saved_regs == offsets->outgoing_args
19799 || frame_pointer_needed);
19803 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19804 STR and STRD. If an even number of registers are being pushed, one
19805 or more STRD patterns are created for each register pair. If an
19806 odd number of registers are pushed, emit an initial STR followed by
19807 as many STRD instructions as are needed. This works best when the
19808 stack is initially 64-bit aligned (the normal case), since it
19809 ensures that each STRD is also 64-bit aligned. */
19810 static void
19811 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19813 int num_regs = 0;
19814 int i;
19815 int regno;
19816 rtx par = NULL_RTX;
19817 rtx dwarf = NULL_RTX;
19818 rtx tmp;
19819 bool first = true;
19821 num_regs = bit_count (saved_regs_mask);
19823 /* Must be at least one register to save, and can't save SP or PC. */
19824 gcc_assert (num_regs > 0 && num_regs <= 14);
19825 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19826 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19828 /* Create sequence for DWARF info. All the frame-related data for
19829 debugging is held in this wrapper. */
19830 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19832 /* Describe the stack adjustment. */
19833 tmp = gen_rtx_SET (VOIDmode,
19834 stack_pointer_rtx,
19835 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19836 RTX_FRAME_RELATED_P (tmp) = 1;
19837 XVECEXP (dwarf, 0, 0) = tmp;
19839 /* Find the first register. */
19840 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19843 i = 0;
19845 /* If there's an odd number of registers to push. Start off by
19846 pushing a single register. This ensures that subsequent strd
19847 operations are dword aligned (assuming that SP was originally
19848 64-bit aligned). */
19849 if ((num_regs & 1) != 0)
19851 rtx reg, mem, insn;
19853 reg = gen_rtx_REG (SImode, regno);
19854 if (num_regs == 1)
19855 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19856 stack_pointer_rtx));
19857 else
19858 mem = gen_frame_mem (Pmode,
19859 gen_rtx_PRE_MODIFY
19860 (Pmode, stack_pointer_rtx,
19861 plus_constant (Pmode, stack_pointer_rtx,
19862 -4 * num_regs)));
19864 tmp = gen_rtx_SET (VOIDmode, mem, reg);
19865 RTX_FRAME_RELATED_P (tmp) = 1;
19866 insn = emit_insn (tmp);
19867 RTX_FRAME_RELATED_P (insn) = 1;
19868 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19869 tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19870 reg);
19871 RTX_FRAME_RELATED_P (tmp) = 1;
19872 i++;
19873 regno++;
19874 XVECEXP (dwarf, 0, i) = tmp;
19875 first = false;
19878 while (i < num_regs)
19879 if (saved_regs_mask & (1 << regno))
19881 rtx reg1, reg2, mem1, mem2;
19882 rtx tmp0, tmp1, tmp2;
19883 int regno2;
19885 /* Find the register to pair with this one. */
19886 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19887 regno2++)
19890 reg1 = gen_rtx_REG (SImode, regno);
19891 reg2 = gen_rtx_REG (SImode, regno2);
19893 if (first)
19895 rtx insn;
19897 first = false;
19898 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19899 stack_pointer_rtx,
19900 -4 * num_regs));
19901 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19902 stack_pointer_rtx,
19903 -4 * (num_regs - 1)));
19904 tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19905 plus_constant (Pmode, stack_pointer_rtx,
19906 -4 * (num_regs)));
19907 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19908 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19909 RTX_FRAME_RELATED_P (tmp0) = 1;
19910 RTX_FRAME_RELATED_P (tmp1) = 1;
19911 RTX_FRAME_RELATED_P (tmp2) = 1;
19912 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19913 XVECEXP (par, 0, 0) = tmp0;
19914 XVECEXP (par, 0, 1) = tmp1;
19915 XVECEXP (par, 0, 2) = tmp2;
19916 insn = emit_insn (par);
19917 RTX_FRAME_RELATED_P (insn) = 1;
19918 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19920 else
19922 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19923 stack_pointer_rtx,
19924 4 * i));
19925 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19926 stack_pointer_rtx,
19927 4 * (i + 1)));
19928 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19929 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19930 RTX_FRAME_RELATED_P (tmp1) = 1;
19931 RTX_FRAME_RELATED_P (tmp2) = 1;
19932 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19933 XVECEXP (par, 0, 0) = tmp1;
19934 XVECEXP (par, 0, 1) = tmp2;
19935 emit_insn (par);
19938 /* Create unwind information. This is an approximation. */
19939 tmp1 = gen_rtx_SET (VOIDmode,
19940 gen_frame_mem (Pmode,
19941 plus_constant (Pmode,
19942 stack_pointer_rtx,
19943 4 * i)),
19944 reg1);
19945 tmp2 = gen_rtx_SET (VOIDmode,
19946 gen_frame_mem (Pmode,
19947 plus_constant (Pmode,
19948 stack_pointer_rtx,
19949 4 * (i + 1))),
19950 reg2);
19952 RTX_FRAME_RELATED_P (tmp1) = 1;
19953 RTX_FRAME_RELATED_P (tmp2) = 1;
19954 XVECEXP (dwarf, 0, i + 1) = tmp1;
19955 XVECEXP (dwarf, 0, i + 2) = tmp2;
19956 i += 2;
19957 regno = regno2 + 1;
19959 else
19960 regno++;
19962 return;
19965 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19966 whenever possible, otherwise it emits single-word stores. The first store
19967 also allocates stack space for all saved registers, using writeback with
19968 post-addressing mode. All other stores use offset addressing. If no STRD
19969 can be emitted, this function emits a sequence of single-word stores,
19970 and not an STM as before, because single-word stores provide more freedom
19971 scheduling and can be turned into an STM by peephole optimizations. */
19972 static void
19973 arm_emit_strd_push (unsigned long saved_regs_mask)
19975 int num_regs = 0;
19976 int i, j, dwarf_index = 0;
19977 int offset = 0;
19978 rtx dwarf = NULL_RTX;
19979 rtx insn = NULL_RTX;
19980 rtx tmp, mem;
19982 /* TODO: A more efficient code can be emitted by changing the
19983 layout, e.g., first push all pairs that can use STRD to keep the
19984 stack aligned, and then push all other registers. */
19985 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19986 if (saved_regs_mask & (1 << i))
19987 num_regs++;
19989 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19990 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19991 gcc_assert (num_regs > 0);
19993 /* Create sequence for DWARF info. */
19994 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19996 /* For dwarf info, we generate explicit stack update. */
19997 tmp = gen_rtx_SET (VOIDmode,
19998 stack_pointer_rtx,
19999 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20000 RTX_FRAME_RELATED_P (tmp) = 1;
20001 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20003 /* Save registers. */
20004 offset = - 4 * num_regs;
20005 j = 0;
20006 while (j <= LAST_ARM_REGNUM)
20007 if (saved_regs_mask & (1 << j))
20009 if ((j % 2 == 0)
20010 && (saved_regs_mask & (1 << (j + 1))))
20012 /* Current register and previous register form register pair for
20013 which STRD can be generated. */
20014 if (offset < 0)
20016 /* Allocate stack space for all saved registers. */
20017 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20018 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20019 mem = gen_frame_mem (DImode, tmp);
20020 offset = 0;
20022 else if (offset > 0)
20023 mem = gen_frame_mem (DImode,
20024 plus_constant (Pmode,
20025 stack_pointer_rtx,
20026 offset));
20027 else
20028 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20030 tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
20031 RTX_FRAME_RELATED_P (tmp) = 1;
20032 tmp = emit_insn (tmp);
20034 /* Record the first store insn. */
20035 if (dwarf_index == 1)
20036 insn = tmp;
20038 /* Generate dwarf info. */
20039 mem = gen_frame_mem (SImode,
20040 plus_constant (Pmode,
20041 stack_pointer_rtx,
20042 offset));
20043 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
20044 RTX_FRAME_RELATED_P (tmp) = 1;
20045 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20047 mem = gen_frame_mem (SImode,
20048 plus_constant (Pmode,
20049 stack_pointer_rtx,
20050 offset + 4));
20051 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
20052 RTX_FRAME_RELATED_P (tmp) = 1;
20053 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20055 offset += 8;
20056 j += 2;
20058 else
20060 /* Emit a single word store. */
20061 if (offset < 0)
20063 /* Allocate stack space for all saved registers. */
20064 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20065 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20066 mem = gen_frame_mem (SImode, tmp);
20067 offset = 0;
20069 else if (offset > 0)
20070 mem = gen_frame_mem (SImode,
20071 plus_constant (Pmode,
20072 stack_pointer_rtx,
20073 offset));
20074 else
20075 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20077 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
20078 RTX_FRAME_RELATED_P (tmp) = 1;
20079 tmp = emit_insn (tmp);
20081 /* Record the first store insn. */
20082 if (dwarf_index == 1)
20083 insn = tmp;
20085 /* Generate dwarf info. */
20086 mem = gen_frame_mem (SImode,
20087 plus_constant(Pmode,
20088 stack_pointer_rtx,
20089 offset));
20090 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
20091 RTX_FRAME_RELATED_P (tmp) = 1;
20092 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20094 offset += 4;
20095 j += 1;
20098 else
20099 j++;
20101 /* Attach dwarf info to the first insn we generate. */
20102 gcc_assert (insn != NULL_RTX);
20103 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20104 RTX_FRAME_RELATED_P (insn) = 1;
20107 /* Generate and emit an insn that we will recognize as a push_multi.
20108 Unfortunately, since this insn does not reflect very well the actual
20109 semantics of the operation, we need to annotate the insn for the benefit
20110 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20111 MASK for registers that should be annotated for DWARF2 frame unwind
20112 information. */
20113 static rtx
20114 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20116 int num_regs = 0;
20117 int num_dwarf_regs = 0;
20118 int i, j;
20119 rtx par;
20120 rtx dwarf;
20121 int dwarf_par_index;
20122 rtx tmp, reg;
20124 /* We don't record the PC in the dwarf frame information. */
20125 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20127 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20129 if (mask & (1 << i))
20130 num_regs++;
20131 if (dwarf_regs_mask & (1 << i))
20132 num_dwarf_regs++;
20135 gcc_assert (num_regs && num_regs <= 16);
20136 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20138 /* For the body of the insn we are going to generate an UNSPEC in
20139 parallel with several USEs. This allows the insn to be recognized
20140 by the push_multi pattern in the arm.md file.
20142 The body of the insn looks something like this:
20144 (parallel [
20145 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20146 (const_int:SI <num>)))
20147 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20148 (use (reg:SI XX))
20149 (use (reg:SI YY))
20153 For the frame note however, we try to be more explicit and actually
20154 show each register being stored into the stack frame, plus a (single)
20155 decrement of the stack pointer. We do it this way in order to be
20156 friendly to the stack unwinding code, which only wants to see a single
20157 stack decrement per instruction. The RTL we generate for the note looks
20158 something like this:
20160 (sequence [
20161 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20162 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20163 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20164 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20168 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20169 instead we'd have a parallel expression detailing all
20170 the stores to the various memory addresses so that debug
20171 information is more up-to-date. Remember however while writing
20172 this to take care of the constraints with the push instruction.
20174 Note also that this has to be taken care of for the VFP registers.
20176 For more see PR43399. */
20178 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20179 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20180 dwarf_par_index = 1;
20182 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20184 if (mask & (1 << i))
20186 reg = gen_rtx_REG (SImode, i);
20188 XVECEXP (par, 0, 0)
20189 = gen_rtx_SET (VOIDmode,
20190 gen_frame_mem
20191 (BLKmode,
20192 gen_rtx_PRE_MODIFY (Pmode,
20193 stack_pointer_rtx,
20194 plus_constant
20195 (Pmode, stack_pointer_rtx,
20196 -4 * num_regs))
20198 gen_rtx_UNSPEC (BLKmode,
20199 gen_rtvec (1, reg),
20200 UNSPEC_PUSH_MULT));
20202 if (dwarf_regs_mask & (1 << i))
20204 tmp = gen_rtx_SET (VOIDmode,
20205 gen_frame_mem (SImode, stack_pointer_rtx),
20206 reg);
20207 RTX_FRAME_RELATED_P (tmp) = 1;
20208 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20211 break;
20215 for (j = 1, i++; j < num_regs; i++)
20217 if (mask & (1 << i))
20219 reg = gen_rtx_REG (SImode, i);
20221 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20223 if (dwarf_regs_mask & (1 << i))
20226 = gen_rtx_SET (VOIDmode,
20227 gen_frame_mem
20228 (SImode,
20229 plus_constant (Pmode, stack_pointer_rtx,
20230 4 * j)),
20231 reg);
20232 RTX_FRAME_RELATED_P (tmp) = 1;
20233 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20236 j++;
20240 par = emit_insn (par);
20242 tmp = gen_rtx_SET (VOIDmode,
20243 stack_pointer_rtx,
20244 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20245 RTX_FRAME_RELATED_P (tmp) = 1;
20246 XVECEXP (dwarf, 0, 0) = tmp;
20248 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20250 return par;
20253 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20254 SIZE is the offset to be adjusted.
20255 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20256 static void
20257 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20259 rtx dwarf;
20261 RTX_FRAME_RELATED_P (insn) = 1;
20262 dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
20263 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20266 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20267 SAVED_REGS_MASK shows which registers need to be restored.
20269 Unfortunately, since this insn does not reflect very well the actual
20270 semantics of the operation, we need to annotate the insn for the benefit
20271 of DWARF2 frame unwind information. */
20272 static void
20273 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20275 int num_regs = 0;
20276 int i, j;
20277 rtx par;
20278 rtx dwarf = NULL_RTX;
20279 rtx tmp, reg;
20280 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20281 int offset_adj;
20282 int emit_update;
20284 offset_adj = return_in_pc ? 1 : 0;
20285 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20286 if (saved_regs_mask & (1 << i))
20287 num_regs++;
20289 gcc_assert (num_regs && num_regs <= 16);
20291 /* If SP is in reglist, then we don't emit SP update insn. */
20292 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20294 /* The parallel needs to hold num_regs SETs
20295 and one SET for the stack update. */
20296 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20298 if (return_in_pc)
20299 XVECEXP (par, 0, 0) = ret_rtx;
20301 if (emit_update)
20303 /* Increment the stack pointer, based on there being
20304 num_regs 4-byte registers to restore. */
20305 tmp = gen_rtx_SET (VOIDmode,
20306 stack_pointer_rtx,
20307 plus_constant (Pmode,
20308 stack_pointer_rtx,
20309 4 * num_regs));
20310 RTX_FRAME_RELATED_P (tmp) = 1;
20311 XVECEXP (par, 0, offset_adj) = tmp;
20314 /* Now restore every reg, which may include PC. */
20315 for (j = 0, i = 0; j < num_regs; i++)
20316 if (saved_regs_mask & (1 << i))
20318 reg = gen_rtx_REG (SImode, i);
20319 if ((num_regs == 1) && emit_update && !return_in_pc)
20321 /* Emit single load with writeback. */
20322 tmp = gen_frame_mem (SImode,
20323 gen_rtx_POST_INC (Pmode,
20324 stack_pointer_rtx));
20325 tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
20326 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20327 return;
20330 tmp = gen_rtx_SET (VOIDmode,
20331 reg,
20332 gen_frame_mem
20333 (SImode,
20334 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20335 RTX_FRAME_RELATED_P (tmp) = 1;
20336 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20338 /* We need to maintain a sequence for DWARF info too. As dwarf info
20339 should not have PC, skip PC. */
20340 if (i != PC_REGNUM)
20341 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20343 j++;
20346 if (return_in_pc)
20347 par = emit_jump_insn (par);
20348 else
20349 par = emit_insn (par);
20351 REG_NOTES (par) = dwarf;
20352 if (!return_in_pc)
20353 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20354 stack_pointer_rtx, stack_pointer_rtx);
20357 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20358 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20360 Unfortunately, since this insn does not reflect very well the actual
20361 semantics of the operation, we need to annotate the insn for the benefit
20362 of DWARF2 frame unwind information. */
20363 static void
20364 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20366 int i, j;
20367 rtx par;
20368 rtx dwarf = NULL_RTX;
20369 rtx tmp, reg;
20371 gcc_assert (num_regs && num_regs <= 32);
20373 /* Workaround ARM10 VFPr1 bug. */
20374 if (num_regs == 2 && !arm_arch6)
20376 if (first_reg == 15)
20377 first_reg--;
20379 num_regs++;
20382 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20383 there could be up to 32 D-registers to restore.
20384 If there are more than 16 D-registers, make two recursive calls,
20385 each of which emits one pop_multi instruction. */
20386 if (num_regs > 16)
20388 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20389 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20390 return;
20393 /* The parallel needs to hold num_regs SETs
20394 and one SET for the stack update. */
20395 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20397 /* Increment the stack pointer, based on there being
20398 num_regs 8-byte registers to restore. */
20399 tmp = gen_rtx_SET (VOIDmode,
20400 base_reg,
20401 plus_constant (Pmode, base_reg, 8 * num_regs));
20402 RTX_FRAME_RELATED_P (tmp) = 1;
20403 XVECEXP (par, 0, 0) = tmp;
20405 /* Now show every reg that will be restored, using a SET for each. */
20406 for (j = 0, i=first_reg; j < num_regs; i += 2)
20408 reg = gen_rtx_REG (DFmode, i);
20410 tmp = gen_rtx_SET (VOIDmode,
20411 reg,
20412 gen_frame_mem
20413 (DFmode,
20414 plus_constant (Pmode, base_reg, 8 * j)));
20415 RTX_FRAME_RELATED_P (tmp) = 1;
20416 XVECEXP (par, 0, j + 1) = tmp;
20418 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20420 j++;
20423 par = emit_insn (par);
20424 REG_NOTES (par) = dwarf;
20426 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20427 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20429 RTX_FRAME_RELATED_P (par) = 1;
20430 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20432 else
20433 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20434 base_reg, base_reg);
20437 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20438 number of registers are being popped, multiple LDRD patterns are created for
20439 all register pairs. If odd number of registers are popped, last register is
20440 loaded by using LDR pattern. */
20441 static void
20442 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20444 int num_regs = 0;
20445 int i, j;
20446 rtx par = NULL_RTX;
20447 rtx dwarf = NULL_RTX;
20448 rtx tmp, reg, tmp1;
20449 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20451 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20452 if (saved_regs_mask & (1 << i))
20453 num_regs++;
20455 gcc_assert (num_regs && num_regs <= 16);
20457 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20458 to be popped. So, if num_regs is even, now it will become odd,
20459 and we can generate pop with PC. If num_regs is odd, it will be
20460 even now, and ldr with return can be generated for PC. */
20461 if (return_in_pc)
20462 num_regs--;
20464 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20466 /* Var j iterates over all the registers to gather all the registers in
20467 saved_regs_mask. Var i gives index of saved registers in stack frame.
20468 A PARALLEL RTX of register-pair is created here, so that pattern for
20469 LDRD can be matched. As PC is always last register to be popped, and
20470 we have already decremented num_regs if PC, we don't have to worry
20471 about PC in this loop. */
20472 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20473 if (saved_regs_mask & (1 << j))
20475 /* Create RTX for memory load. */
20476 reg = gen_rtx_REG (SImode, j);
20477 tmp = gen_rtx_SET (SImode,
20478 reg,
20479 gen_frame_mem (SImode,
20480 plus_constant (Pmode,
20481 stack_pointer_rtx, 4 * i)));
20482 RTX_FRAME_RELATED_P (tmp) = 1;
20484 if (i % 2 == 0)
20486 /* When saved-register index (i) is even, the RTX to be emitted is
20487 yet to be created. Hence create it first. The LDRD pattern we
20488 are generating is :
20489 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20490 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20491 where target registers need not be consecutive. */
20492 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20493 dwarf = NULL_RTX;
20496 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20497 added as 0th element and if i is odd, reg_i is added as 1st element
20498 of LDRD pattern shown above. */
20499 XVECEXP (par, 0, (i % 2)) = tmp;
20500 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20502 if ((i % 2) == 1)
20504 /* When saved-register index (i) is odd, RTXs for both the registers
20505 to be loaded are generated in above given LDRD pattern, and the
20506 pattern can be emitted now. */
20507 par = emit_insn (par);
20508 REG_NOTES (par) = dwarf;
20509 RTX_FRAME_RELATED_P (par) = 1;
20512 i++;
20515 /* If the number of registers pushed is odd AND return_in_pc is false OR
20516 number of registers are even AND return_in_pc is true, last register is
20517 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20518 then LDR with post increment. */
20520 /* Increment the stack pointer, based on there being
20521 num_regs 4-byte registers to restore. */
20522 tmp = gen_rtx_SET (VOIDmode,
20523 stack_pointer_rtx,
20524 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20525 RTX_FRAME_RELATED_P (tmp) = 1;
20526 tmp = emit_insn (tmp);
20527 if (!return_in_pc)
20529 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20530 stack_pointer_rtx, stack_pointer_rtx);
20533 dwarf = NULL_RTX;
20535 if (((num_regs % 2) == 1 && !return_in_pc)
20536 || ((num_regs % 2) == 0 && return_in_pc))
20538 /* Scan for the single register to be popped. Skip until the saved
20539 register is found. */
20540 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20542 /* Gen LDR with post increment here. */
20543 tmp1 = gen_rtx_MEM (SImode,
20544 gen_rtx_POST_INC (SImode,
20545 stack_pointer_rtx));
20546 set_mem_alias_set (tmp1, get_frame_alias_set ());
20548 reg = gen_rtx_REG (SImode, j);
20549 tmp = gen_rtx_SET (SImode, reg, tmp1);
20550 RTX_FRAME_RELATED_P (tmp) = 1;
20551 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20553 if (return_in_pc)
20555 /* If return_in_pc, j must be PC_REGNUM. */
20556 gcc_assert (j == PC_REGNUM);
20557 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20558 XVECEXP (par, 0, 0) = ret_rtx;
20559 XVECEXP (par, 0, 1) = tmp;
20560 par = emit_jump_insn (par);
20562 else
20564 par = emit_insn (tmp);
20565 REG_NOTES (par) = dwarf;
20566 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20567 stack_pointer_rtx, stack_pointer_rtx);
20571 else if ((num_regs % 2) == 1 && return_in_pc)
20573 /* There are 2 registers to be popped. So, generate the pattern
20574 pop_multiple_with_stack_update_and_return to pop in PC. */
20575 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20578 return;
20581 /* LDRD in ARM mode needs consecutive registers as operands. This function
20582 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20583 offset addressing and then generates one separate stack udpate. This provides
20584 more scheduling freedom, compared to writeback on every load. However,
20585 if the function returns using load into PC directly
20586 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20587 before the last load. TODO: Add a peephole optimization to recognize
20588 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20589 peephole optimization to merge the load at stack-offset zero
20590 with the stack update instruction using load with writeback
20591 in post-index addressing mode. */
20592 static void
20593 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20595 int j = 0;
20596 int offset = 0;
20597 rtx par = NULL_RTX;
20598 rtx dwarf = NULL_RTX;
20599 rtx tmp, mem;
20601 /* Restore saved registers. */
20602 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20603 j = 0;
20604 while (j <= LAST_ARM_REGNUM)
20605 if (saved_regs_mask & (1 << j))
20607 if ((j % 2) == 0
20608 && (saved_regs_mask & (1 << (j + 1)))
20609 && (j + 1) != PC_REGNUM)
20611 /* Current register and next register form register pair for which
20612 LDRD can be generated. PC is always the last register popped, and
20613 we handle it separately. */
20614 if (offset > 0)
20615 mem = gen_frame_mem (DImode,
20616 plus_constant (Pmode,
20617 stack_pointer_rtx,
20618 offset));
20619 else
20620 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20622 tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20623 tmp = emit_insn (tmp);
20624 RTX_FRAME_RELATED_P (tmp) = 1;
20626 /* Generate dwarf info. */
20628 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20629 gen_rtx_REG (SImode, j),
20630 NULL_RTX);
20631 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20632 gen_rtx_REG (SImode, j + 1),
20633 dwarf);
20635 REG_NOTES (tmp) = dwarf;
20637 offset += 8;
20638 j += 2;
20640 else if (j != PC_REGNUM)
20642 /* Emit a single word load. */
20643 if (offset > 0)
20644 mem = gen_frame_mem (SImode,
20645 plus_constant (Pmode,
20646 stack_pointer_rtx,
20647 offset));
20648 else
20649 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20651 tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20652 tmp = emit_insn (tmp);
20653 RTX_FRAME_RELATED_P (tmp) = 1;
20655 /* Generate dwarf info. */
20656 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20657 gen_rtx_REG (SImode, j),
20658 NULL_RTX);
20660 offset += 4;
20661 j += 1;
20663 else /* j == PC_REGNUM */
20664 j++;
20666 else
20667 j++;
20669 /* Update the stack. */
20670 if (offset > 0)
20672 tmp = gen_rtx_SET (Pmode,
20673 stack_pointer_rtx,
20674 plus_constant (Pmode,
20675 stack_pointer_rtx,
20676 offset));
20677 tmp = emit_insn (tmp);
20678 arm_add_cfa_adjust_cfa_note (tmp, offset,
20679 stack_pointer_rtx, stack_pointer_rtx);
20680 offset = 0;
20683 if (saved_regs_mask & (1 << PC_REGNUM))
20685 /* Only PC is to be popped. */
20686 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20687 XVECEXP (par, 0, 0) = ret_rtx;
20688 tmp = gen_rtx_SET (SImode,
20689 gen_rtx_REG (SImode, PC_REGNUM),
20690 gen_frame_mem (SImode,
20691 gen_rtx_POST_INC (SImode,
20692 stack_pointer_rtx)));
20693 RTX_FRAME_RELATED_P (tmp) = 1;
20694 XVECEXP (par, 0, 1) = tmp;
20695 par = emit_jump_insn (par);
20697 /* Generate dwarf info. */
20698 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20699 gen_rtx_REG (SImode, PC_REGNUM),
20700 NULL_RTX);
20701 REG_NOTES (par) = dwarf;
20702 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20703 stack_pointer_rtx, stack_pointer_rtx);
20707 /* Calculate the size of the return value that is passed in registers. */
20708 static unsigned
20709 arm_size_return_regs (void)
20711 machine_mode mode;
20713 if (crtl->return_rtx != 0)
20714 mode = GET_MODE (crtl->return_rtx);
20715 else
20716 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20718 return GET_MODE_SIZE (mode);
20721 /* Return true if the current function needs to save/restore LR. */
20722 static bool
20723 thumb_force_lr_save (void)
20725 return !cfun->machine->lr_save_eliminated
20726 && (!leaf_function_p ()
20727 || thumb_far_jump_used_p ()
20728 || df_regs_ever_live_p (LR_REGNUM));
20731 /* We do not know if r3 will be available because
20732 we do have an indirect tailcall happening in this
20733 particular case. */
20734 static bool
20735 is_indirect_tailcall_p (rtx call)
20737 rtx pat = PATTERN (call);
20739 /* Indirect tail call. */
20740 pat = XVECEXP (pat, 0, 0);
20741 if (GET_CODE (pat) == SET)
20742 pat = SET_SRC (pat);
20744 pat = XEXP (XEXP (pat, 0), 0);
20745 return REG_P (pat);
20748 /* Return true if r3 is used by any of the tail call insns in the
20749 current function. */
20750 static bool
20751 any_sibcall_could_use_r3 (void)
20753 edge_iterator ei;
20754 edge e;
20756 if (!crtl->tail_call_emit)
20757 return false;
20758 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20759 if (e->flags & EDGE_SIBCALL)
20761 rtx call = BB_END (e->src);
20762 if (!CALL_P (call))
20763 call = prev_nonnote_nondebug_insn (call);
20764 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20765 if (find_regno_fusage (call, USE, 3)
20766 || is_indirect_tailcall_p (call))
20767 return true;
20769 return false;
20773 /* Compute the distance from register FROM to register TO.
20774 These can be the arg pointer (26), the soft frame pointer (25),
20775 the stack pointer (13) or the hard frame pointer (11).
20776 In thumb mode r7 is used as the soft frame pointer, if needed.
20777 Typical stack layout looks like this:
20779 old stack pointer -> | |
20780 ----
20781 | | \
20782 | | saved arguments for
20783 | | vararg functions
20784 | | /
20786 hard FP & arg pointer -> | | \
20787 | | stack
20788 | | frame
20789 | | /
20791 | | \
20792 | | call saved
20793 | | registers
20794 soft frame pointer -> | | /
20796 | | \
20797 | | local
20798 | | variables
20799 locals base pointer -> | | /
20801 | | \
20802 | | outgoing
20803 | | arguments
20804 current stack pointer -> | | /
20807 For a given function some or all of these stack components
20808 may not be needed, giving rise to the possibility of
20809 eliminating some of the registers.
20811 The values returned by this function must reflect the behavior
20812 of arm_expand_prologue() and arm_compute_save_reg_mask().
20814 The sign of the number returned reflects the direction of stack
20815 growth, so the values are positive for all eliminations except
20816 from the soft frame pointer to the hard frame pointer.
20818 SFP may point just inside the local variables block to ensure correct
20819 alignment. */
20822 /* Calculate stack offsets. These are used to calculate register elimination
20823 offsets and in prologue/epilogue code. Also calculates which registers
20824 should be saved. */
20826 static arm_stack_offsets *
20827 arm_get_frame_offsets (void)
20829 struct arm_stack_offsets *offsets;
20830 unsigned long func_type;
20831 int leaf;
20832 int saved;
20833 int core_saved;
20834 HOST_WIDE_INT frame_size;
20835 int i;
20837 offsets = &cfun->machine->stack_offsets;
20839 /* We need to know if we are a leaf function. Unfortunately, it
20840 is possible to be called after start_sequence has been called,
20841 which causes get_insns to return the insns for the sequence,
20842 not the function, which will cause leaf_function_p to return
20843 the incorrect result.
20845 to know about leaf functions once reload has completed, and the
20846 frame size cannot be changed after that time, so we can safely
20847 use the cached value. */
20849 if (reload_completed)
20850 return offsets;
20852 /* Initially this is the size of the local variables. It will translated
20853 into an offset once we have determined the size of preceding data. */
20854 frame_size = ROUND_UP_WORD (get_frame_size ());
20856 leaf = leaf_function_p ();
20858 /* Space for variadic functions. */
20859 offsets->saved_args = crtl->args.pretend_args_size;
20861 /* In Thumb mode this is incorrect, but never used. */
20862 offsets->frame
20863 = (offsets->saved_args
20864 + arm_compute_static_chain_stack_bytes ()
20865 + (frame_pointer_needed ? 4 : 0));
20867 if (TARGET_32BIT)
20869 unsigned int regno;
20871 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20872 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20873 saved = core_saved;
20875 /* We know that SP will be doubleword aligned on entry, and we must
20876 preserve that condition at any subroutine call. We also require the
20877 soft frame pointer to be doubleword aligned. */
20879 if (TARGET_REALLY_IWMMXT)
20881 /* Check for the call-saved iWMMXt registers. */
20882 for (regno = FIRST_IWMMXT_REGNUM;
20883 regno <= LAST_IWMMXT_REGNUM;
20884 regno++)
20885 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20886 saved += 8;
20889 func_type = arm_current_func_type ();
20890 /* Space for saved VFP registers. */
20891 if (! IS_VOLATILE (func_type)
20892 && TARGET_HARD_FLOAT && TARGET_VFP)
20893 saved += arm_get_vfp_saved_size ();
20895 else /* TARGET_THUMB1 */
20897 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20898 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20899 saved = core_saved;
20900 if (TARGET_BACKTRACE)
20901 saved += 16;
20904 /* Saved registers include the stack frame. */
20905 offsets->saved_regs
20906 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20907 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20909 /* A leaf function does not need any stack alignment if it has nothing
20910 on the stack. */
20911 if (leaf && frame_size == 0
20912 /* However if it calls alloca(), we have a dynamically allocated
20913 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20914 && ! cfun->calls_alloca)
20916 offsets->outgoing_args = offsets->soft_frame;
20917 offsets->locals_base = offsets->soft_frame;
20918 return offsets;
20921 /* Ensure SFP has the correct alignment. */
20922 if (ARM_DOUBLEWORD_ALIGN
20923 && (offsets->soft_frame & 7))
20925 offsets->soft_frame += 4;
20926 /* Try to align stack by pushing an extra reg. Don't bother doing this
20927 when there is a stack frame as the alignment will be rolled into
20928 the normal stack adjustment. */
20929 if (frame_size + crtl->outgoing_args_size == 0)
20931 int reg = -1;
20933 /* Register r3 is caller-saved. Normally it does not need to be
20934 saved on entry by the prologue. However if we choose to save
20935 it for padding then we may confuse the compiler into thinking
20936 a prologue sequence is required when in fact it is not. This
20937 will occur when shrink-wrapping if r3 is used as a scratch
20938 register and there are no other callee-saved writes.
20940 This situation can be avoided when other callee-saved registers
20941 are available and r3 is not mandatory if we choose a callee-saved
20942 register for padding. */
20943 bool prefer_callee_reg_p = false;
20945 /* If it is safe to use r3, then do so. This sometimes
20946 generates better code on Thumb-2 by avoiding the need to
20947 use 32-bit push/pop instructions. */
20948 if (! any_sibcall_could_use_r3 ()
20949 && arm_size_return_regs () <= 12
20950 && (offsets->saved_regs_mask & (1 << 3)) == 0
20951 && (TARGET_THUMB2
20952 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20954 reg = 3;
20955 if (!TARGET_THUMB2)
20956 prefer_callee_reg_p = true;
20958 if (reg == -1
20959 || prefer_callee_reg_p)
20961 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20963 /* Avoid fixed registers; they may be changed at
20964 arbitrary times so it's unsafe to restore them
20965 during the epilogue. */
20966 if (!fixed_regs[i]
20967 && (offsets->saved_regs_mask & (1 << i)) == 0)
20969 reg = i;
20970 break;
20975 if (reg != -1)
20977 offsets->saved_regs += 4;
20978 offsets->saved_regs_mask |= (1 << reg);
20983 offsets->locals_base = offsets->soft_frame + frame_size;
20984 offsets->outgoing_args = (offsets->locals_base
20985 + crtl->outgoing_args_size);
20987 if (ARM_DOUBLEWORD_ALIGN)
20989 /* Ensure SP remains doubleword aligned. */
20990 if (offsets->outgoing_args & 7)
20991 offsets->outgoing_args += 4;
20992 gcc_assert (!(offsets->outgoing_args & 7));
20995 return offsets;
20999 /* Calculate the relative offsets for the different stack pointers. Positive
21000 offsets are in the direction of stack growth. */
21002 HOST_WIDE_INT
21003 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21005 arm_stack_offsets *offsets;
21007 offsets = arm_get_frame_offsets ();
21009 /* OK, now we have enough information to compute the distances.
21010 There must be an entry in these switch tables for each pair
21011 of registers in ELIMINABLE_REGS, even if some of the entries
21012 seem to be redundant or useless. */
21013 switch (from)
21015 case ARG_POINTER_REGNUM:
21016 switch (to)
21018 case THUMB_HARD_FRAME_POINTER_REGNUM:
21019 return 0;
21021 case FRAME_POINTER_REGNUM:
21022 /* This is the reverse of the soft frame pointer
21023 to hard frame pointer elimination below. */
21024 return offsets->soft_frame - offsets->saved_args;
21026 case ARM_HARD_FRAME_POINTER_REGNUM:
21027 /* This is only non-zero in the case where the static chain register
21028 is stored above the frame. */
21029 return offsets->frame - offsets->saved_args - 4;
21031 case STACK_POINTER_REGNUM:
21032 /* If nothing has been pushed on the stack at all
21033 then this will return -4. This *is* correct! */
21034 return offsets->outgoing_args - (offsets->saved_args + 4);
21036 default:
21037 gcc_unreachable ();
21039 gcc_unreachable ();
21041 case FRAME_POINTER_REGNUM:
21042 switch (to)
21044 case THUMB_HARD_FRAME_POINTER_REGNUM:
21045 return 0;
21047 case ARM_HARD_FRAME_POINTER_REGNUM:
21048 /* The hard frame pointer points to the top entry in the
21049 stack frame. The soft frame pointer to the bottom entry
21050 in the stack frame. If there is no stack frame at all,
21051 then they are identical. */
21053 return offsets->frame - offsets->soft_frame;
21055 case STACK_POINTER_REGNUM:
21056 return offsets->outgoing_args - offsets->soft_frame;
21058 default:
21059 gcc_unreachable ();
21061 gcc_unreachable ();
21063 default:
21064 /* You cannot eliminate from the stack pointer.
21065 In theory you could eliminate from the hard frame
21066 pointer to the stack pointer, but this will never
21067 happen, since if a stack frame is not needed the
21068 hard frame pointer will never be used. */
21069 gcc_unreachable ();
21073 /* Given FROM and TO register numbers, say whether this elimination is
21074 allowed. Frame pointer elimination is automatically handled.
21076 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21077 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21078 pointer, we must eliminate FRAME_POINTER_REGNUM into
21079 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21080 ARG_POINTER_REGNUM. */
21082 bool
21083 arm_can_eliminate (const int from, const int to)
21085 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21086 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21087 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21088 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21089 true);
21092 /* Emit RTL to save coprocessor registers on function entry. Returns the
21093 number of bytes pushed. */
21095 static int
21096 arm_save_coproc_regs(void)
21098 int saved_size = 0;
21099 unsigned reg;
21100 unsigned start_reg;
21101 rtx insn;
21103 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21104 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21106 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21107 insn = gen_rtx_MEM (V2SImode, insn);
21108 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21109 RTX_FRAME_RELATED_P (insn) = 1;
21110 saved_size += 8;
21113 if (TARGET_HARD_FLOAT && TARGET_VFP)
21115 start_reg = FIRST_VFP_REGNUM;
21117 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21119 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21120 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21122 if (start_reg != reg)
21123 saved_size += vfp_emit_fstmd (start_reg,
21124 (reg - start_reg) / 2);
21125 start_reg = reg + 2;
21128 if (start_reg != reg)
21129 saved_size += vfp_emit_fstmd (start_reg,
21130 (reg - start_reg) / 2);
21132 return saved_size;
21136 /* Set the Thumb frame pointer from the stack pointer. */
21138 static void
21139 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21141 HOST_WIDE_INT amount;
21142 rtx insn, dwarf;
21144 amount = offsets->outgoing_args - offsets->locals_base;
21145 if (amount < 1024)
21146 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21147 stack_pointer_rtx, GEN_INT (amount)));
21148 else
21150 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21151 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21152 expects the first two operands to be the same. */
21153 if (TARGET_THUMB2)
21155 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21156 stack_pointer_rtx,
21157 hard_frame_pointer_rtx));
21159 else
21161 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21162 hard_frame_pointer_rtx,
21163 stack_pointer_rtx));
21165 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
21166 plus_constant (Pmode, stack_pointer_rtx, amount));
21167 RTX_FRAME_RELATED_P (dwarf) = 1;
21168 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21171 RTX_FRAME_RELATED_P (insn) = 1;
21174 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21175 function. */
21176 void
21177 arm_expand_prologue (void)
21179 rtx amount;
21180 rtx insn;
21181 rtx ip_rtx;
21182 unsigned long live_regs_mask;
21183 unsigned long func_type;
21184 int fp_offset = 0;
21185 int saved_pretend_args = 0;
21186 int saved_regs = 0;
21187 unsigned HOST_WIDE_INT args_to_push;
21188 arm_stack_offsets *offsets;
21190 func_type = arm_current_func_type ();
21192 /* Naked functions don't have prologues. */
21193 if (IS_NAKED (func_type))
21194 return;
21196 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21197 args_to_push = crtl->args.pretend_args_size;
21199 /* Compute which register we will have to save onto the stack. */
21200 offsets = arm_get_frame_offsets ();
21201 live_regs_mask = offsets->saved_regs_mask;
21203 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21205 if (IS_STACKALIGN (func_type))
21207 rtx r0, r1;
21209 /* Handle a word-aligned stack pointer. We generate the following:
21211 mov r0, sp
21212 bic r1, r0, #7
21213 mov sp, r1
21214 <save and restore r0 in normal prologue/epilogue>
21215 mov sp, r0
21216 bx lr
21218 The unwinder doesn't need to know about the stack realignment.
21219 Just tell it we saved SP in r0. */
21220 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21222 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21223 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21225 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21226 RTX_FRAME_RELATED_P (insn) = 1;
21227 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21229 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21231 /* ??? The CFA changes here, which may cause GDB to conclude that it
21232 has entered a different function. That said, the unwind info is
21233 correct, individually, before and after this instruction because
21234 we've described the save of SP, which will override the default
21235 handling of SP as restoring from the CFA. */
21236 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21239 /* For APCS frames, if IP register is clobbered
21240 when creating frame, save that register in a special
21241 way. */
21242 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21244 if (IS_INTERRUPT (func_type))
21246 /* Interrupt functions must not corrupt any registers.
21247 Creating a frame pointer however, corrupts the IP
21248 register, so we must push it first. */
21249 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21251 /* Do not set RTX_FRAME_RELATED_P on this insn.
21252 The dwarf stack unwinding code only wants to see one
21253 stack decrement per function, and this is not it. If
21254 this instruction is labeled as being part of the frame
21255 creation sequence then dwarf2out_frame_debug_expr will
21256 die when it encounters the assignment of IP to FP
21257 later on, since the use of SP here establishes SP as
21258 the CFA register and not IP.
21260 Anyway this instruction is not really part of the stack
21261 frame creation although it is part of the prologue. */
21263 else if (IS_NESTED (func_type))
21265 /* The static chain register is the same as the IP register
21266 used as a scratch register during stack frame creation.
21267 To get around this need to find somewhere to store IP
21268 whilst the frame is being created. We try the following
21269 places in order:
21271 1. The last argument register r3 if it is available.
21272 2. A slot on the stack above the frame if there are no
21273 arguments to push onto the stack.
21274 3. Register r3 again, after pushing the argument registers
21275 onto the stack, if this is a varargs function.
21276 4. The last slot on the stack created for the arguments to
21277 push, if this isn't a varargs function.
21279 Note - we only need to tell the dwarf2 backend about the SP
21280 adjustment in the second variant; the static chain register
21281 doesn't need to be unwound, as it doesn't contain a value
21282 inherited from the caller. */
21284 if (!arm_r3_live_at_start_p ())
21285 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21286 else if (args_to_push == 0)
21288 rtx addr, dwarf;
21290 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21291 saved_regs += 4;
21293 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21294 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21295 fp_offset = 4;
21297 /* Just tell the dwarf backend that we adjusted SP. */
21298 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21299 plus_constant (Pmode, stack_pointer_rtx,
21300 -fp_offset));
21301 RTX_FRAME_RELATED_P (insn) = 1;
21302 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21304 else
21306 /* Store the args on the stack. */
21307 if (cfun->machine->uses_anonymous_args)
21309 insn
21310 = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21311 (0xf0 >> (args_to_push / 4)) & 0xf);
21312 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21313 saved_pretend_args = 1;
21315 else
21317 rtx addr, dwarf;
21319 if (args_to_push == 4)
21320 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21321 else
21322 addr
21323 = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21324 plus_constant (Pmode,
21325 stack_pointer_rtx,
21326 -args_to_push));
21328 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21330 /* Just tell the dwarf backend that we adjusted SP. */
21331 dwarf
21332 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21333 plus_constant (Pmode, stack_pointer_rtx,
21334 -args_to_push));
21335 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21338 RTX_FRAME_RELATED_P (insn) = 1;
21339 fp_offset = args_to_push;
21340 args_to_push = 0;
21344 insn = emit_set_insn (ip_rtx,
21345 plus_constant (Pmode, stack_pointer_rtx,
21346 fp_offset));
21347 RTX_FRAME_RELATED_P (insn) = 1;
21350 if (args_to_push)
21352 /* Push the argument registers, or reserve space for them. */
21353 if (cfun->machine->uses_anonymous_args)
21354 insn = emit_multi_reg_push
21355 ((0xf0 >> (args_to_push / 4)) & 0xf,
21356 (0xf0 >> (args_to_push / 4)) & 0xf);
21357 else
21358 insn = emit_insn
21359 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21360 GEN_INT (- args_to_push)));
21361 RTX_FRAME_RELATED_P (insn) = 1;
21364 /* If this is an interrupt service routine, and the link register
21365 is going to be pushed, and we're not generating extra
21366 push of IP (needed when frame is needed and frame layout if apcs),
21367 subtracting four from LR now will mean that the function return
21368 can be done with a single instruction. */
21369 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21370 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21371 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21372 && TARGET_ARM)
21374 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21376 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21379 if (live_regs_mask)
21381 unsigned long dwarf_regs_mask = live_regs_mask;
21383 saved_regs += bit_count (live_regs_mask) * 4;
21384 if (optimize_size && !frame_pointer_needed
21385 && saved_regs == offsets->saved_regs - offsets->saved_args)
21387 /* If no coprocessor registers are being pushed and we don't have
21388 to worry about a frame pointer then push extra registers to
21389 create the stack frame. This is done is a way that does not
21390 alter the frame layout, so is independent of the epilogue. */
21391 int n;
21392 int frame;
21393 n = 0;
21394 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21395 n++;
21396 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21397 if (frame && n * 4 >= frame)
21399 n = frame / 4;
21400 live_regs_mask |= (1 << n) - 1;
21401 saved_regs += frame;
21405 if (TARGET_LDRD
21406 && current_tune->prefer_ldrd_strd
21407 && !optimize_function_for_size_p (cfun))
21409 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21410 if (TARGET_THUMB2)
21411 thumb2_emit_strd_push (live_regs_mask);
21412 else if (TARGET_ARM
21413 && !TARGET_APCS_FRAME
21414 && !IS_INTERRUPT (func_type))
21415 arm_emit_strd_push (live_regs_mask);
21416 else
21418 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21419 RTX_FRAME_RELATED_P (insn) = 1;
21422 else
21424 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21425 RTX_FRAME_RELATED_P (insn) = 1;
21429 if (! IS_VOLATILE (func_type))
21430 saved_regs += arm_save_coproc_regs ();
21432 if (frame_pointer_needed && TARGET_ARM)
21434 /* Create the new frame pointer. */
21435 if (TARGET_APCS_FRAME)
21437 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21438 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21439 RTX_FRAME_RELATED_P (insn) = 1;
21441 if (IS_NESTED (func_type))
21443 /* Recover the static chain register. */
21444 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21445 insn = gen_rtx_REG (SImode, 3);
21446 else
21448 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21449 insn = gen_frame_mem (SImode, insn);
21451 emit_set_insn (ip_rtx, insn);
21452 /* Add a USE to stop propagate_one_insn() from barfing. */
21453 emit_insn (gen_force_register_use (ip_rtx));
21456 else
21458 insn = GEN_INT (saved_regs - 4);
21459 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21460 stack_pointer_rtx, insn));
21461 RTX_FRAME_RELATED_P (insn) = 1;
21465 if (flag_stack_usage_info)
21466 current_function_static_stack_size
21467 = offsets->outgoing_args - offsets->saved_args;
21469 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21471 /* This add can produce multiple insns for a large constant, so we
21472 need to get tricky. */
21473 rtx_insn *last = get_last_insn ();
21475 amount = GEN_INT (offsets->saved_args + saved_regs
21476 - offsets->outgoing_args);
21478 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21479 amount));
21482 last = last ? NEXT_INSN (last) : get_insns ();
21483 RTX_FRAME_RELATED_P (last) = 1;
21485 while (last != insn);
21487 /* If the frame pointer is needed, emit a special barrier that
21488 will prevent the scheduler from moving stores to the frame
21489 before the stack adjustment. */
21490 if (frame_pointer_needed)
21491 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21492 hard_frame_pointer_rtx));
21496 if (frame_pointer_needed && TARGET_THUMB2)
21497 thumb_set_frame_pointer (offsets);
21499 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21501 unsigned long mask;
21503 mask = live_regs_mask;
21504 mask &= THUMB2_WORK_REGS;
21505 if (!IS_NESTED (func_type))
21506 mask |= (1 << IP_REGNUM);
21507 arm_load_pic_register (mask);
21510 /* If we are profiling, make sure no instructions are scheduled before
21511 the call to mcount. Similarly if the user has requested no
21512 scheduling in the prolog. Similarly if we want non-call exceptions
21513 using the EABI unwinder, to prevent faulting instructions from being
21514 swapped with a stack adjustment. */
21515 if (crtl->profile || !TARGET_SCHED_PROLOG
21516 || (arm_except_unwind_info (&global_options) == UI_TARGET
21517 && cfun->can_throw_non_call_exceptions))
21518 emit_insn (gen_blockage ());
21520 /* If the link register is being kept alive, with the return address in it,
21521 then make sure that it does not get reused by the ce2 pass. */
21522 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21523 cfun->machine->lr_save_eliminated = 1;
21526 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21527 static void
21528 arm_print_condition (FILE *stream)
21530 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21532 /* Branch conversion is not implemented for Thumb-2. */
21533 if (TARGET_THUMB)
21535 output_operand_lossage ("predicated Thumb instruction");
21536 return;
21538 if (current_insn_predicate != NULL)
21540 output_operand_lossage
21541 ("predicated instruction in conditional sequence");
21542 return;
21545 fputs (arm_condition_codes[arm_current_cc], stream);
21547 else if (current_insn_predicate)
21549 enum arm_cond_code code;
21551 if (TARGET_THUMB1)
21553 output_operand_lossage ("predicated Thumb instruction");
21554 return;
21557 code = get_arm_condition_code (current_insn_predicate);
21558 fputs (arm_condition_codes[code], stream);
21563 /* Globally reserved letters: acln
21564 Puncutation letters currently used: @_|?().!#
21565 Lower case letters currently used: bcdefhimpqtvwxyz
21566 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21567 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21569 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21571 If CODE is 'd', then the X is a condition operand and the instruction
21572 should only be executed if the condition is true.
21573 if CODE is 'D', then the X is a condition operand and the instruction
21574 should only be executed if the condition is false: however, if the mode
21575 of the comparison is CCFPEmode, then always execute the instruction -- we
21576 do this because in these circumstances !GE does not necessarily imply LT;
21577 in these cases the instruction pattern will take care to make sure that
21578 an instruction containing %d will follow, thereby undoing the effects of
21579 doing this instruction unconditionally.
21580 If CODE is 'N' then X is a floating point operand that must be negated
21581 before output.
21582 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21583 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21584 static void
21585 arm_print_operand (FILE *stream, rtx x, int code)
21587 switch (code)
21589 case '@':
21590 fputs (ASM_COMMENT_START, stream);
21591 return;
21593 case '_':
21594 fputs (user_label_prefix, stream);
21595 return;
21597 case '|':
21598 fputs (REGISTER_PREFIX, stream);
21599 return;
21601 case '?':
21602 arm_print_condition (stream);
21603 return;
21605 case '(':
21606 /* Nothing in unified syntax, otherwise the current condition code. */
21607 if (!TARGET_UNIFIED_ASM)
21608 arm_print_condition (stream);
21609 break;
21611 case ')':
21612 /* The current condition code in unified syntax, otherwise nothing. */
21613 if (TARGET_UNIFIED_ASM)
21614 arm_print_condition (stream);
21615 break;
21617 case '.':
21618 /* The current condition code for a condition code setting instruction.
21619 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21620 if (TARGET_UNIFIED_ASM)
21622 fputc('s', stream);
21623 arm_print_condition (stream);
21625 else
21627 arm_print_condition (stream);
21628 fputc('s', stream);
21630 return;
21632 case '!':
21633 /* If the instruction is conditionally executed then print
21634 the current condition code, otherwise print 's'. */
21635 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21636 if (current_insn_predicate)
21637 arm_print_condition (stream);
21638 else
21639 fputc('s', stream);
21640 break;
21642 /* %# is a "break" sequence. It doesn't output anything, but is used to
21643 separate e.g. operand numbers from following text, if that text consists
21644 of further digits which we don't want to be part of the operand
21645 number. */
21646 case '#':
21647 return;
21649 case 'N':
21651 REAL_VALUE_TYPE r;
21652 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21653 r = real_value_negate (&r);
21654 fprintf (stream, "%s", fp_const_from_val (&r));
21656 return;
21658 /* An integer or symbol address without a preceding # sign. */
21659 case 'c':
21660 switch (GET_CODE (x))
21662 case CONST_INT:
21663 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21664 break;
21666 case SYMBOL_REF:
21667 output_addr_const (stream, x);
21668 break;
21670 case CONST:
21671 if (GET_CODE (XEXP (x, 0)) == PLUS
21672 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21674 output_addr_const (stream, x);
21675 break;
21677 /* Fall through. */
21679 default:
21680 output_operand_lossage ("Unsupported operand for code '%c'", code);
21682 return;
21684 /* An integer that we want to print in HEX. */
21685 case 'x':
21686 switch (GET_CODE (x))
21688 case CONST_INT:
21689 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21690 break;
21692 default:
21693 output_operand_lossage ("Unsupported operand for code '%c'", code);
21695 return;
21697 case 'B':
21698 if (CONST_INT_P (x))
21700 HOST_WIDE_INT val;
21701 val = ARM_SIGN_EXTEND (~INTVAL (x));
21702 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21704 else
21706 putc ('~', stream);
21707 output_addr_const (stream, x);
21709 return;
21711 case 'b':
21712 /* Print the log2 of a CONST_INT. */
21714 HOST_WIDE_INT val;
21716 if (!CONST_INT_P (x)
21717 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21718 output_operand_lossage ("Unsupported operand for code '%c'", code);
21719 else
21720 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21722 return;
21724 case 'L':
21725 /* The low 16 bits of an immediate constant. */
21726 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21727 return;
21729 case 'i':
21730 fprintf (stream, "%s", arithmetic_instr (x, 1));
21731 return;
21733 case 'I':
21734 fprintf (stream, "%s", arithmetic_instr (x, 0));
21735 return;
21737 case 'S':
21739 HOST_WIDE_INT val;
21740 const char *shift;
21742 shift = shift_op (x, &val);
21744 if (shift)
21746 fprintf (stream, ", %s ", shift);
21747 if (val == -1)
21748 arm_print_operand (stream, XEXP (x, 1), 0);
21749 else
21750 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21753 return;
21755 /* An explanation of the 'Q', 'R' and 'H' register operands:
21757 In a pair of registers containing a DI or DF value the 'Q'
21758 operand returns the register number of the register containing
21759 the least significant part of the value. The 'R' operand returns
21760 the register number of the register containing the most
21761 significant part of the value.
21763 The 'H' operand returns the higher of the two register numbers.
21764 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21765 same as the 'Q' operand, since the most significant part of the
21766 value is held in the lower number register. The reverse is true
21767 on systems where WORDS_BIG_ENDIAN is false.
21769 The purpose of these operands is to distinguish between cases
21770 where the endian-ness of the values is important (for example
21771 when they are added together), and cases where the endian-ness
21772 is irrelevant, but the order of register operations is important.
21773 For example when loading a value from memory into a register
21774 pair, the endian-ness does not matter. Provided that the value
21775 from the lower memory address is put into the lower numbered
21776 register, and the value from the higher address is put into the
21777 higher numbered register, the load will work regardless of whether
21778 the value being loaded is big-wordian or little-wordian. The
21779 order of the two register loads can matter however, if the address
21780 of the memory location is actually held in one of the registers
21781 being overwritten by the load.
21783 The 'Q' and 'R' constraints are also available for 64-bit
21784 constants. */
21785 case 'Q':
21786 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21788 rtx part = gen_lowpart (SImode, x);
21789 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21790 return;
21793 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21795 output_operand_lossage ("invalid operand for code '%c'", code);
21796 return;
21799 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21800 return;
21802 case 'R':
21803 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21805 machine_mode mode = GET_MODE (x);
21806 rtx part;
21808 if (mode == VOIDmode)
21809 mode = DImode;
21810 part = gen_highpart_mode (SImode, mode, x);
21811 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21812 return;
21815 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21817 output_operand_lossage ("invalid operand for code '%c'", code);
21818 return;
21821 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21822 return;
21824 case 'H':
21825 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21827 output_operand_lossage ("invalid operand for code '%c'", code);
21828 return;
21831 asm_fprintf (stream, "%r", REGNO (x) + 1);
21832 return;
21834 case 'J':
21835 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21837 output_operand_lossage ("invalid operand for code '%c'", code);
21838 return;
21841 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21842 return;
21844 case 'K':
21845 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21847 output_operand_lossage ("invalid operand for code '%c'", code);
21848 return;
21851 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21852 return;
21854 case 'm':
21855 asm_fprintf (stream, "%r",
21856 REG_P (XEXP (x, 0))
21857 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21858 return;
21860 case 'M':
21861 asm_fprintf (stream, "{%r-%r}",
21862 REGNO (x),
21863 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21864 return;
21866 /* Like 'M', but writing doubleword vector registers, for use by Neon
21867 insns. */
21868 case 'h':
21870 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21871 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21872 if (numregs == 1)
21873 asm_fprintf (stream, "{d%d}", regno);
21874 else
21875 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21877 return;
21879 case 'd':
21880 /* CONST_TRUE_RTX means always -- that's the default. */
21881 if (x == const_true_rtx)
21882 return;
21884 if (!COMPARISON_P (x))
21886 output_operand_lossage ("invalid operand for code '%c'", code);
21887 return;
21890 fputs (arm_condition_codes[get_arm_condition_code (x)],
21891 stream);
21892 return;
21894 case 'D':
21895 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21896 want to do that. */
21897 if (x == const_true_rtx)
21899 output_operand_lossage ("instruction never executed");
21900 return;
21902 if (!COMPARISON_P (x))
21904 output_operand_lossage ("invalid operand for code '%c'", code);
21905 return;
21908 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21909 (get_arm_condition_code (x))],
21910 stream);
21911 return;
21913 case 's':
21914 case 'V':
21915 case 'W':
21916 case 'X':
21917 case 'Y':
21918 case 'Z':
21919 /* Former Maverick support, removed after GCC-4.7. */
21920 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21921 return;
21923 case 'U':
21924 if (!REG_P (x)
21925 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21926 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21927 /* Bad value for wCG register number. */
21929 output_operand_lossage ("invalid operand for code '%c'", code);
21930 return;
21933 else
21934 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21935 return;
21937 /* Print an iWMMXt control register name. */
21938 case 'w':
21939 if (!CONST_INT_P (x)
21940 || INTVAL (x) < 0
21941 || INTVAL (x) >= 16)
21942 /* Bad value for wC register number. */
21944 output_operand_lossage ("invalid operand for code '%c'", code);
21945 return;
21948 else
21950 static const char * wc_reg_names [16] =
21952 "wCID", "wCon", "wCSSF", "wCASF",
21953 "wC4", "wC5", "wC6", "wC7",
21954 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21955 "wC12", "wC13", "wC14", "wC15"
21958 fputs (wc_reg_names [INTVAL (x)], stream);
21960 return;
21962 /* Print the high single-precision register of a VFP double-precision
21963 register. */
21964 case 'p':
21966 machine_mode mode = GET_MODE (x);
21967 int regno;
21969 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21971 output_operand_lossage ("invalid operand for code '%c'", code);
21972 return;
21975 regno = REGNO (x);
21976 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21978 output_operand_lossage ("invalid operand for code '%c'", code);
21979 return;
21982 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21984 return;
21986 /* Print a VFP/Neon double precision or quad precision register name. */
21987 case 'P':
21988 case 'q':
21990 machine_mode mode = GET_MODE (x);
21991 int is_quad = (code == 'q');
21992 int regno;
21994 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21996 output_operand_lossage ("invalid operand for code '%c'", code);
21997 return;
22000 if (!REG_P (x)
22001 || !IS_VFP_REGNUM (REGNO (x)))
22003 output_operand_lossage ("invalid operand for code '%c'", code);
22004 return;
22007 regno = REGNO (x);
22008 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22009 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22011 output_operand_lossage ("invalid operand for code '%c'", code);
22012 return;
22015 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22016 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22018 return;
22020 /* These two codes print the low/high doubleword register of a Neon quad
22021 register, respectively. For pair-structure types, can also print
22022 low/high quadword registers. */
22023 case 'e':
22024 case 'f':
22026 machine_mode mode = GET_MODE (x);
22027 int regno;
22029 if ((GET_MODE_SIZE (mode) != 16
22030 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22032 output_operand_lossage ("invalid operand for code '%c'", code);
22033 return;
22036 regno = REGNO (x);
22037 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22039 output_operand_lossage ("invalid operand for code '%c'", code);
22040 return;
22043 if (GET_MODE_SIZE (mode) == 16)
22044 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22045 + (code == 'f' ? 1 : 0));
22046 else
22047 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22048 + (code == 'f' ? 1 : 0));
22050 return;
22052 /* Print a VFPv3 floating-point constant, represented as an integer
22053 index. */
22054 case 'G':
22056 int index = vfp3_const_double_index (x);
22057 gcc_assert (index != -1);
22058 fprintf (stream, "%d", index);
22060 return;
22062 /* Print bits representing opcode features for Neon.
22064 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22065 and polynomials as unsigned.
22067 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22069 Bit 2 is 1 for rounding functions, 0 otherwise. */
22071 /* Identify the type as 's', 'u', 'p' or 'f'. */
22072 case 'T':
22074 HOST_WIDE_INT bits = INTVAL (x);
22075 fputc ("uspf"[bits & 3], stream);
22077 return;
22079 /* Likewise, but signed and unsigned integers are both 'i'. */
22080 case 'F':
22082 HOST_WIDE_INT bits = INTVAL (x);
22083 fputc ("iipf"[bits & 3], stream);
22085 return;
22087 /* As for 'T', but emit 'u' instead of 'p'. */
22088 case 't':
22090 HOST_WIDE_INT bits = INTVAL (x);
22091 fputc ("usuf"[bits & 3], stream);
22093 return;
22095 /* Bit 2: rounding (vs none). */
22096 case 'O':
22098 HOST_WIDE_INT bits = INTVAL (x);
22099 fputs ((bits & 4) != 0 ? "r" : "", stream);
22101 return;
22103 /* Memory operand for vld1/vst1 instruction. */
22104 case 'A':
22106 rtx addr;
22107 bool postinc = FALSE;
22108 rtx postinc_reg = NULL;
22109 unsigned align, memsize, align_bits;
22111 gcc_assert (MEM_P (x));
22112 addr = XEXP (x, 0);
22113 if (GET_CODE (addr) == POST_INC)
22115 postinc = 1;
22116 addr = XEXP (addr, 0);
22118 if (GET_CODE (addr) == POST_MODIFY)
22120 postinc_reg = XEXP( XEXP (addr, 1), 1);
22121 addr = XEXP (addr, 0);
22123 asm_fprintf (stream, "[%r", REGNO (addr));
22125 /* We know the alignment of this access, so we can emit a hint in the
22126 instruction (for some alignments) as an aid to the memory subsystem
22127 of the target. */
22128 align = MEM_ALIGN (x) >> 3;
22129 memsize = MEM_SIZE (x);
22131 /* Only certain alignment specifiers are supported by the hardware. */
22132 if (memsize == 32 && (align % 32) == 0)
22133 align_bits = 256;
22134 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22135 align_bits = 128;
22136 else if (memsize >= 8 && (align % 8) == 0)
22137 align_bits = 64;
22138 else
22139 align_bits = 0;
22141 if (align_bits != 0)
22142 asm_fprintf (stream, ":%d", align_bits);
22144 asm_fprintf (stream, "]");
22146 if (postinc)
22147 fputs("!", stream);
22148 if (postinc_reg)
22149 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22151 return;
22153 case 'C':
22155 rtx addr;
22157 gcc_assert (MEM_P (x));
22158 addr = XEXP (x, 0);
22159 gcc_assert (REG_P (addr));
22160 asm_fprintf (stream, "[%r]", REGNO (addr));
22162 return;
22164 /* Translate an S register number into a D register number and element index. */
22165 case 'y':
22167 machine_mode mode = GET_MODE (x);
22168 int regno;
22170 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22172 output_operand_lossage ("invalid operand for code '%c'", code);
22173 return;
22176 regno = REGNO (x);
22177 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22179 output_operand_lossage ("invalid operand for code '%c'", code);
22180 return;
22183 regno = regno - FIRST_VFP_REGNUM;
22184 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22186 return;
22188 case 'v':
22189 gcc_assert (CONST_DOUBLE_P (x));
22190 int result;
22191 result = vfp3_const_double_for_fract_bits (x);
22192 if (result == 0)
22193 result = vfp3_const_double_for_bits (x);
22194 fprintf (stream, "#%d", result);
22195 return;
22197 /* Register specifier for vld1.16/vst1.16. Translate the S register
22198 number into a D register number and element index. */
22199 case 'z':
22201 machine_mode mode = GET_MODE (x);
22202 int regno;
22204 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22206 output_operand_lossage ("invalid operand for code '%c'", code);
22207 return;
22210 regno = REGNO (x);
22211 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22213 output_operand_lossage ("invalid operand for code '%c'", code);
22214 return;
22217 regno = regno - FIRST_VFP_REGNUM;
22218 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22220 return;
22222 default:
22223 if (x == 0)
22225 output_operand_lossage ("missing operand");
22226 return;
22229 switch (GET_CODE (x))
22231 case REG:
22232 asm_fprintf (stream, "%r", REGNO (x));
22233 break;
22235 case MEM:
22236 output_memory_reference_mode = GET_MODE (x);
22237 output_address (XEXP (x, 0));
22238 break;
22240 case CONST_DOUBLE:
22242 char fpstr[20];
22243 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22244 sizeof (fpstr), 0, 1);
22245 fprintf (stream, "#%s", fpstr);
22247 break;
22249 default:
22250 gcc_assert (GET_CODE (x) != NEG);
22251 fputc ('#', stream);
22252 if (GET_CODE (x) == HIGH)
22254 fputs (":lower16:", stream);
22255 x = XEXP (x, 0);
22258 output_addr_const (stream, x);
22259 break;
22264 /* Target hook for printing a memory address. */
22265 static void
22266 arm_print_operand_address (FILE *stream, rtx x)
22268 if (TARGET_32BIT)
22270 int is_minus = GET_CODE (x) == MINUS;
22272 if (REG_P (x))
22273 asm_fprintf (stream, "[%r]", REGNO (x));
22274 else if (GET_CODE (x) == PLUS || is_minus)
22276 rtx base = XEXP (x, 0);
22277 rtx index = XEXP (x, 1);
22278 HOST_WIDE_INT offset = 0;
22279 if (!REG_P (base)
22280 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22282 /* Ensure that BASE is a register. */
22283 /* (one of them must be). */
22284 /* Also ensure the SP is not used as in index register. */
22285 std::swap (base, index);
22287 switch (GET_CODE (index))
22289 case CONST_INT:
22290 offset = INTVAL (index);
22291 if (is_minus)
22292 offset = -offset;
22293 asm_fprintf (stream, "[%r, #%wd]",
22294 REGNO (base), offset);
22295 break;
22297 case REG:
22298 asm_fprintf (stream, "[%r, %s%r]",
22299 REGNO (base), is_minus ? "-" : "",
22300 REGNO (index));
22301 break;
22303 case MULT:
22304 case ASHIFTRT:
22305 case LSHIFTRT:
22306 case ASHIFT:
22307 case ROTATERT:
22309 asm_fprintf (stream, "[%r, %s%r",
22310 REGNO (base), is_minus ? "-" : "",
22311 REGNO (XEXP (index, 0)));
22312 arm_print_operand (stream, index, 'S');
22313 fputs ("]", stream);
22314 break;
22317 default:
22318 gcc_unreachable ();
22321 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22322 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22324 extern machine_mode output_memory_reference_mode;
22326 gcc_assert (REG_P (XEXP (x, 0)));
22328 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22329 asm_fprintf (stream, "[%r, #%s%d]!",
22330 REGNO (XEXP (x, 0)),
22331 GET_CODE (x) == PRE_DEC ? "-" : "",
22332 GET_MODE_SIZE (output_memory_reference_mode));
22333 else
22334 asm_fprintf (stream, "[%r], #%s%d",
22335 REGNO (XEXP (x, 0)),
22336 GET_CODE (x) == POST_DEC ? "-" : "",
22337 GET_MODE_SIZE (output_memory_reference_mode));
22339 else if (GET_CODE (x) == PRE_MODIFY)
22341 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22342 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22343 asm_fprintf (stream, "#%wd]!",
22344 INTVAL (XEXP (XEXP (x, 1), 1)));
22345 else
22346 asm_fprintf (stream, "%r]!",
22347 REGNO (XEXP (XEXP (x, 1), 1)));
22349 else if (GET_CODE (x) == POST_MODIFY)
22351 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22352 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22353 asm_fprintf (stream, "#%wd",
22354 INTVAL (XEXP (XEXP (x, 1), 1)));
22355 else
22356 asm_fprintf (stream, "%r",
22357 REGNO (XEXP (XEXP (x, 1), 1)));
22359 else output_addr_const (stream, x);
22361 else
22363 if (REG_P (x))
22364 asm_fprintf (stream, "[%r]", REGNO (x));
22365 else if (GET_CODE (x) == POST_INC)
22366 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22367 else if (GET_CODE (x) == PLUS)
22369 gcc_assert (REG_P (XEXP (x, 0)));
22370 if (CONST_INT_P (XEXP (x, 1)))
22371 asm_fprintf (stream, "[%r, #%wd]",
22372 REGNO (XEXP (x, 0)),
22373 INTVAL (XEXP (x, 1)));
22374 else
22375 asm_fprintf (stream, "[%r, %r]",
22376 REGNO (XEXP (x, 0)),
22377 REGNO (XEXP (x, 1)));
22379 else
22380 output_addr_const (stream, x);
22384 /* Target hook for indicating whether a punctuation character for
22385 TARGET_PRINT_OPERAND is valid. */
22386 static bool
22387 arm_print_operand_punct_valid_p (unsigned char code)
22389 return (code == '@' || code == '|' || code == '.'
22390 || code == '(' || code == ')' || code == '#'
22391 || (TARGET_32BIT && (code == '?'))
22392 || (TARGET_THUMB2 && (code == '!'))
22393 || (TARGET_THUMB && (code == '_')));
22396 /* Target hook for assembling integer objects. The ARM version needs to
22397 handle word-sized values specially. */
22398 static bool
22399 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22401 machine_mode mode;
22403 if (size == UNITS_PER_WORD && aligned_p)
22405 fputs ("\t.word\t", asm_out_file);
22406 output_addr_const (asm_out_file, x);
22408 /* Mark symbols as position independent. We only do this in the
22409 .text segment, not in the .data segment. */
22410 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22411 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22413 /* See legitimize_pic_address for an explanation of the
22414 TARGET_VXWORKS_RTP check. */
22415 if (!arm_pic_data_is_text_relative
22416 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22417 fputs ("(GOT)", asm_out_file);
22418 else
22419 fputs ("(GOTOFF)", asm_out_file);
22421 fputc ('\n', asm_out_file);
22422 return true;
22425 mode = GET_MODE (x);
22427 if (arm_vector_mode_supported_p (mode))
22429 int i, units;
22431 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22433 units = CONST_VECTOR_NUNITS (x);
22434 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22436 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22437 for (i = 0; i < units; i++)
22439 rtx elt = CONST_VECTOR_ELT (x, i);
22440 assemble_integer
22441 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22443 else
22444 for (i = 0; i < units; i++)
22446 rtx elt = CONST_VECTOR_ELT (x, i);
22447 REAL_VALUE_TYPE rval;
22449 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22451 assemble_real
22452 (rval, GET_MODE_INNER (mode),
22453 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22456 return true;
22459 return default_assemble_integer (x, size, aligned_p);
22462 static void
22463 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22465 section *s;
22467 if (!TARGET_AAPCS_BASED)
22469 (is_ctor ?
22470 default_named_section_asm_out_constructor
22471 : default_named_section_asm_out_destructor) (symbol, priority);
22472 return;
22475 /* Put these in the .init_array section, using a special relocation. */
22476 if (priority != DEFAULT_INIT_PRIORITY)
22478 char buf[18];
22479 sprintf (buf, "%s.%.5u",
22480 is_ctor ? ".init_array" : ".fini_array",
22481 priority);
22482 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22484 else if (is_ctor)
22485 s = ctors_section;
22486 else
22487 s = dtors_section;
22489 switch_to_section (s);
22490 assemble_align (POINTER_SIZE);
22491 fputs ("\t.word\t", asm_out_file);
22492 output_addr_const (asm_out_file, symbol);
22493 fputs ("(target1)\n", asm_out_file);
22496 /* Add a function to the list of static constructors. */
22498 static void
22499 arm_elf_asm_constructor (rtx symbol, int priority)
22501 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22504 /* Add a function to the list of static destructors. */
22506 static void
22507 arm_elf_asm_destructor (rtx symbol, int priority)
22509 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22512 /* A finite state machine takes care of noticing whether or not instructions
22513 can be conditionally executed, and thus decrease execution time and code
22514 size by deleting branch instructions. The fsm is controlled by
22515 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22517 /* The state of the fsm controlling condition codes are:
22518 0: normal, do nothing special
22519 1: make ASM_OUTPUT_OPCODE not output this instruction
22520 2: make ASM_OUTPUT_OPCODE not output this instruction
22521 3: make instructions conditional
22522 4: make instructions conditional
22524 State transitions (state->state by whom under condition):
22525 0 -> 1 final_prescan_insn if the `target' is a label
22526 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22527 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22528 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22529 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22530 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22531 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22532 (the target insn is arm_target_insn).
22534 If the jump clobbers the conditions then we use states 2 and 4.
22536 A similar thing can be done with conditional return insns.
22538 XXX In case the `target' is an unconditional branch, this conditionalising
22539 of the instructions always reduces code size, but not always execution
22540 time. But then, I want to reduce the code size to somewhere near what
22541 /bin/cc produces. */
22543 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22544 instructions. When a COND_EXEC instruction is seen the subsequent
22545 instructions are scanned so that multiple conditional instructions can be
22546 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22547 specify the length and true/false mask for the IT block. These will be
22548 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22550 /* Returns the index of the ARM condition code string in
22551 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22552 COMPARISON should be an rtx like `(eq (...) (...))'. */
22554 enum arm_cond_code
22555 maybe_get_arm_condition_code (rtx comparison)
22557 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22558 enum arm_cond_code code;
22559 enum rtx_code comp_code = GET_CODE (comparison);
22561 if (GET_MODE_CLASS (mode) != MODE_CC)
22562 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22563 XEXP (comparison, 1));
22565 switch (mode)
22567 case CC_DNEmode: code = ARM_NE; goto dominance;
22568 case CC_DEQmode: code = ARM_EQ; goto dominance;
22569 case CC_DGEmode: code = ARM_GE; goto dominance;
22570 case CC_DGTmode: code = ARM_GT; goto dominance;
22571 case CC_DLEmode: code = ARM_LE; goto dominance;
22572 case CC_DLTmode: code = ARM_LT; goto dominance;
22573 case CC_DGEUmode: code = ARM_CS; goto dominance;
22574 case CC_DGTUmode: code = ARM_HI; goto dominance;
22575 case CC_DLEUmode: code = ARM_LS; goto dominance;
22576 case CC_DLTUmode: code = ARM_CC;
22578 dominance:
22579 if (comp_code == EQ)
22580 return ARM_INVERSE_CONDITION_CODE (code);
22581 if (comp_code == NE)
22582 return code;
22583 return ARM_NV;
22585 case CC_NOOVmode:
22586 switch (comp_code)
22588 case NE: return ARM_NE;
22589 case EQ: return ARM_EQ;
22590 case GE: return ARM_PL;
22591 case LT: return ARM_MI;
22592 default: return ARM_NV;
22595 case CC_Zmode:
22596 switch (comp_code)
22598 case NE: return ARM_NE;
22599 case EQ: return ARM_EQ;
22600 default: return ARM_NV;
22603 case CC_Nmode:
22604 switch (comp_code)
22606 case NE: return ARM_MI;
22607 case EQ: return ARM_PL;
22608 default: return ARM_NV;
22611 case CCFPEmode:
22612 case CCFPmode:
22613 /* We can handle all cases except UNEQ and LTGT. */
22614 switch (comp_code)
22616 case GE: return ARM_GE;
22617 case GT: return ARM_GT;
22618 case LE: return ARM_LS;
22619 case LT: return ARM_MI;
22620 case NE: return ARM_NE;
22621 case EQ: return ARM_EQ;
22622 case ORDERED: return ARM_VC;
22623 case UNORDERED: return ARM_VS;
22624 case UNLT: return ARM_LT;
22625 case UNLE: return ARM_LE;
22626 case UNGT: return ARM_HI;
22627 case UNGE: return ARM_PL;
22628 /* UNEQ and LTGT do not have a representation. */
22629 case UNEQ: /* Fall through. */
22630 case LTGT: /* Fall through. */
22631 default: return ARM_NV;
22634 case CC_SWPmode:
22635 switch (comp_code)
22637 case NE: return ARM_NE;
22638 case EQ: return ARM_EQ;
22639 case GE: return ARM_LE;
22640 case GT: return ARM_LT;
22641 case LE: return ARM_GE;
22642 case LT: return ARM_GT;
22643 case GEU: return ARM_LS;
22644 case GTU: return ARM_CC;
22645 case LEU: return ARM_CS;
22646 case LTU: return ARM_HI;
22647 default: return ARM_NV;
22650 case CC_Cmode:
22651 switch (comp_code)
22653 case LTU: return ARM_CS;
22654 case GEU: return ARM_CC;
22655 default: return ARM_NV;
22658 case CC_CZmode:
22659 switch (comp_code)
22661 case NE: return ARM_NE;
22662 case EQ: return ARM_EQ;
22663 case GEU: return ARM_CS;
22664 case GTU: return ARM_HI;
22665 case LEU: return ARM_LS;
22666 case LTU: return ARM_CC;
22667 default: return ARM_NV;
22670 case CC_NCVmode:
22671 switch (comp_code)
22673 case GE: return ARM_GE;
22674 case LT: return ARM_LT;
22675 case GEU: return ARM_CS;
22676 case LTU: return ARM_CC;
22677 default: return ARM_NV;
22680 case CCmode:
22681 switch (comp_code)
22683 case NE: return ARM_NE;
22684 case EQ: return ARM_EQ;
22685 case GE: return ARM_GE;
22686 case GT: return ARM_GT;
22687 case LE: return ARM_LE;
22688 case LT: return ARM_LT;
22689 case GEU: return ARM_CS;
22690 case GTU: return ARM_HI;
22691 case LEU: return ARM_LS;
22692 case LTU: return ARM_CC;
22693 default: return ARM_NV;
22696 default: gcc_unreachable ();
22700 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22701 static enum arm_cond_code
22702 get_arm_condition_code (rtx comparison)
22704 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22705 gcc_assert (code != ARM_NV);
22706 return code;
22709 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22710 instructions. */
22711 void
22712 thumb2_final_prescan_insn (rtx_insn *insn)
22714 rtx_insn *first_insn = insn;
22715 rtx body = PATTERN (insn);
22716 rtx predicate;
22717 enum arm_cond_code code;
22718 int n;
22719 int mask;
22720 int max;
22722 /* max_insns_skipped in the tune was already taken into account in the
22723 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22724 just emit the IT blocks as we can. It does not make sense to split
22725 the IT blocks. */
22726 max = MAX_INSN_PER_IT_BLOCK;
22728 /* Remove the previous insn from the count of insns to be output. */
22729 if (arm_condexec_count)
22730 arm_condexec_count--;
22732 /* Nothing to do if we are already inside a conditional block. */
22733 if (arm_condexec_count)
22734 return;
22736 if (GET_CODE (body) != COND_EXEC)
22737 return;
22739 /* Conditional jumps are implemented directly. */
22740 if (JUMP_P (insn))
22741 return;
22743 predicate = COND_EXEC_TEST (body);
22744 arm_current_cc = get_arm_condition_code (predicate);
22746 n = get_attr_ce_count (insn);
22747 arm_condexec_count = 1;
22748 arm_condexec_mask = (1 << n) - 1;
22749 arm_condexec_masklen = n;
22750 /* See if subsequent instructions can be combined into the same block. */
22751 for (;;)
22753 insn = next_nonnote_insn (insn);
22755 /* Jumping into the middle of an IT block is illegal, so a label or
22756 barrier terminates the block. */
22757 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22758 break;
22760 body = PATTERN (insn);
22761 /* USE and CLOBBER aren't really insns, so just skip them. */
22762 if (GET_CODE (body) == USE
22763 || GET_CODE (body) == CLOBBER)
22764 continue;
22766 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22767 if (GET_CODE (body) != COND_EXEC)
22768 break;
22769 /* Maximum number of conditionally executed instructions in a block. */
22770 n = get_attr_ce_count (insn);
22771 if (arm_condexec_masklen + n > max)
22772 break;
22774 predicate = COND_EXEC_TEST (body);
22775 code = get_arm_condition_code (predicate);
22776 mask = (1 << n) - 1;
22777 if (arm_current_cc == code)
22778 arm_condexec_mask |= (mask << arm_condexec_masklen);
22779 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22780 break;
22782 arm_condexec_count++;
22783 arm_condexec_masklen += n;
22785 /* A jump must be the last instruction in a conditional block. */
22786 if (JUMP_P (insn))
22787 break;
22789 /* Restore recog_data (getting the attributes of other insns can
22790 destroy this array, but final.c assumes that it remains intact
22791 across this call). */
22792 extract_constrain_insn_cached (first_insn);
22795 void
22796 arm_final_prescan_insn (rtx_insn *insn)
22798 /* BODY will hold the body of INSN. */
22799 rtx body = PATTERN (insn);
22801 /* This will be 1 if trying to repeat the trick, and things need to be
22802 reversed if it appears to fail. */
22803 int reverse = 0;
22805 /* If we start with a return insn, we only succeed if we find another one. */
22806 int seeking_return = 0;
22807 enum rtx_code return_code = UNKNOWN;
22809 /* START_INSN will hold the insn from where we start looking. This is the
22810 first insn after the following code_label if REVERSE is true. */
22811 rtx_insn *start_insn = insn;
22813 /* If in state 4, check if the target branch is reached, in order to
22814 change back to state 0. */
22815 if (arm_ccfsm_state == 4)
22817 if (insn == arm_target_insn)
22819 arm_target_insn = NULL;
22820 arm_ccfsm_state = 0;
22822 return;
22825 /* If in state 3, it is possible to repeat the trick, if this insn is an
22826 unconditional branch to a label, and immediately following this branch
22827 is the previous target label which is only used once, and the label this
22828 branch jumps to is not too far off. */
22829 if (arm_ccfsm_state == 3)
22831 if (simplejump_p (insn))
22833 start_insn = next_nonnote_insn (start_insn);
22834 if (BARRIER_P (start_insn))
22836 /* XXX Isn't this always a barrier? */
22837 start_insn = next_nonnote_insn (start_insn);
22839 if (LABEL_P (start_insn)
22840 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22841 && LABEL_NUSES (start_insn) == 1)
22842 reverse = TRUE;
22843 else
22844 return;
22846 else if (ANY_RETURN_P (body))
22848 start_insn = next_nonnote_insn (start_insn);
22849 if (BARRIER_P (start_insn))
22850 start_insn = next_nonnote_insn (start_insn);
22851 if (LABEL_P (start_insn)
22852 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22853 && LABEL_NUSES (start_insn) == 1)
22855 reverse = TRUE;
22856 seeking_return = 1;
22857 return_code = GET_CODE (body);
22859 else
22860 return;
22862 else
22863 return;
22866 gcc_assert (!arm_ccfsm_state || reverse);
22867 if (!JUMP_P (insn))
22868 return;
22870 /* This jump might be paralleled with a clobber of the condition codes
22871 the jump should always come first */
22872 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22873 body = XVECEXP (body, 0, 0);
22875 if (reverse
22876 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22877 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22879 int insns_skipped;
22880 int fail = FALSE, succeed = FALSE;
22881 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22882 int then_not_else = TRUE;
22883 rtx_insn *this_insn = start_insn;
22884 rtx label = 0;
22886 /* Register the insn jumped to. */
22887 if (reverse)
22889 if (!seeking_return)
22890 label = XEXP (SET_SRC (body), 0);
22892 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22893 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22894 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22896 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22897 then_not_else = FALSE;
22899 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22901 seeking_return = 1;
22902 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22904 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22906 seeking_return = 1;
22907 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22908 then_not_else = FALSE;
22910 else
22911 gcc_unreachable ();
22913 /* See how many insns this branch skips, and what kind of insns. If all
22914 insns are okay, and the label or unconditional branch to the same
22915 label is not too far away, succeed. */
22916 for (insns_skipped = 0;
22917 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22919 rtx scanbody;
22921 this_insn = next_nonnote_insn (this_insn);
22922 if (!this_insn)
22923 break;
22925 switch (GET_CODE (this_insn))
22927 case CODE_LABEL:
22928 /* Succeed if it is the target label, otherwise fail since
22929 control falls in from somewhere else. */
22930 if (this_insn == label)
22932 arm_ccfsm_state = 1;
22933 succeed = TRUE;
22935 else
22936 fail = TRUE;
22937 break;
22939 case BARRIER:
22940 /* Succeed if the following insn is the target label.
22941 Otherwise fail.
22942 If return insns are used then the last insn in a function
22943 will be a barrier. */
22944 this_insn = next_nonnote_insn (this_insn);
22945 if (this_insn && this_insn == label)
22947 arm_ccfsm_state = 1;
22948 succeed = TRUE;
22950 else
22951 fail = TRUE;
22952 break;
22954 case CALL_INSN:
22955 /* The AAPCS says that conditional calls should not be
22956 used since they make interworking inefficient (the
22957 linker can't transform BL<cond> into BLX). That's
22958 only a problem if the machine has BLX. */
22959 if (arm_arch5)
22961 fail = TRUE;
22962 break;
22965 /* Succeed if the following insn is the target label, or
22966 if the following two insns are a barrier and the
22967 target label. */
22968 this_insn = next_nonnote_insn (this_insn);
22969 if (this_insn && BARRIER_P (this_insn))
22970 this_insn = next_nonnote_insn (this_insn);
22972 if (this_insn && this_insn == label
22973 && insns_skipped < max_insns_skipped)
22975 arm_ccfsm_state = 1;
22976 succeed = TRUE;
22978 else
22979 fail = TRUE;
22980 break;
22982 case JUMP_INSN:
22983 /* If this is an unconditional branch to the same label, succeed.
22984 If it is to another label, do nothing. If it is conditional,
22985 fail. */
22986 /* XXX Probably, the tests for SET and the PC are
22987 unnecessary. */
22989 scanbody = PATTERN (this_insn);
22990 if (GET_CODE (scanbody) == SET
22991 && GET_CODE (SET_DEST (scanbody)) == PC)
22993 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22994 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22996 arm_ccfsm_state = 2;
22997 succeed = TRUE;
22999 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23000 fail = TRUE;
23002 /* Fail if a conditional return is undesirable (e.g. on a
23003 StrongARM), but still allow this if optimizing for size. */
23004 else if (GET_CODE (scanbody) == return_code
23005 && !use_return_insn (TRUE, NULL)
23006 && !optimize_size)
23007 fail = TRUE;
23008 else if (GET_CODE (scanbody) == return_code)
23010 arm_ccfsm_state = 2;
23011 succeed = TRUE;
23013 else if (GET_CODE (scanbody) == PARALLEL)
23015 switch (get_attr_conds (this_insn))
23017 case CONDS_NOCOND:
23018 break;
23019 default:
23020 fail = TRUE;
23021 break;
23024 else
23025 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23027 break;
23029 case INSN:
23030 /* Instructions using or affecting the condition codes make it
23031 fail. */
23032 scanbody = PATTERN (this_insn);
23033 if (!(GET_CODE (scanbody) == SET
23034 || GET_CODE (scanbody) == PARALLEL)
23035 || get_attr_conds (this_insn) != CONDS_NOCOND)
23036 fail = TRUE;
23037 break;
23039 default:
23040 break;
23043 if (succeed)
23045 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23046 arm_target_label = CODE_LABEL_NUMBER (label);
23047 else
23049 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23051 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23053 this_insn = next_nonnote_insn (this_insn);
23054 gcc_assert (!this_insn
23055 || (!BARRIER_P (this_insn)
23056 && !LABEL_P (this_insn)));
23058 if (!this_insn)
23060 /* Oh, dear! we ran off the end.. give up. */
23061 extract_constrain_insn_cached (insn);
23062 arm_ccfsm_state = 0;
23063 arm_target_insn = NULL;
23064 return;
23066 arm_target_insn = this_insn;
23069 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23070 what it was. */
23071 if (!reverse)
23072 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23074 if (reverse || then_not_else)
23075 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23078 /* Restore recog_data (getting the attributes of other insns can
23079 destroy this array, but final.c assumes that it remains intact
23080 across this call. */
23081 extract_constrain_insn_cached (insn);
23085 /* Output IT instructions. */
23086 void
23087 thumb2_asm_output_opcode (FILE * stream)
23089 char buff[5];
23090 int n;
23092 if (arm_condexec_mask)
23094 for (n = 0; n < arm_condexec_masklen; n++)
23095 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23096 buff[n] = 0;
23097 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23098 arm_condition_codes[arm_current_cc]);
23099 arm_condexec_mask = 0;
23103 /* Returns true if REGNO is a valid register
23104 for holding a quantity of type MODE. */
23106 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23108 if (GET_MODE_CLASS (mode) == MODE_CC)
23109 return (regno == CC_REGNUM
23110 || (TARGET_HARD_FLOAT && TARGET_VFP
23111 && regno == VFPCC_REGNUM));
23113 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23114 return false;
23116 if (TARGET_THUMB1)
23117 /* For the Thumb we only allow values bigger than SImode in
23118 registers 0 - 6, so that there is always a second low
23119 register available to hold the upper part of the value.
23120 We probably we ought to ensure that the register is the
23121 start of an even numbered register pair. */
23122 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23124 if (TARGET_HARD_FLOAT && TARGET_VFP
23125 && IS_VFP_REGNUM (regno))
23127 if (mode == SFmode || mode == SImode)
23128 return VFP_REGNO_OK_FOR_SINGLE (regno);
23130 if (mode == DFmode)
23131 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23133 /* VFP registers can hold HFmode values, but there is no point in
23134 putting them there unless we have hardware conversion insns. */
23135 if (mode == HFmode)
23136 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
23138 if (TARGET_NEON)
23139 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23140 || (VALID_NEON_QREG_MODE (mode)
23141 && NEON_REGNO_OK_FOR_QUAD (regno))
23142 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23143 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23144 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23145 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23146 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23148 return FALSE;
23151 if (TARGET_REALLY_IWMMXT)
23153 if (IS_IWMMXT_GR_REGNUM (regno))
23154 return mode == SImode;
23156 if (IS_IWMMXT_REGNUM (regno))
23157 return VALID_IWMMXT_REG_MODE (mode);
23160 /* We allow almost any value to be stored in the general registers.
23161 Restrict doubleword quantities to even register pairs in ARM state
23162 so that we can use ldrd. Do not allow very large Neon structure
23163 opaque modes in general registers; they would use too many. */
23164 if (regno <= LAST_ARM_REGNUM)
23166 if (ARM_NUM_REGS (mode) > 4)
23167 return FALSE;
23169 if (TARGET_THUMB2)
23170 return TRUE;
23172 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23175 if (regno == FRAME_POINTER_REGNUM
23176 || regno == ARG_POINTER_REGNUM)
23177 /* We only allow integers in the fake hard registers. */
23178 return GET_MODE_CLASS (mode) == MODE_INT;
23180 return FALSE;
23183 /* Implement MODES_TIEABLE_P. */
23185 bool
23186 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23188 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23189 return true;
23191 /* We specifically want to allow elements of "structure" modes to
23192 be tieable to the structure. This more general condition allows
23193 other rarer situations too. */
23194 if (TARGET_NEON
23195 && (VALID_NEON_DREG_MODE (mode1)
23196 || VALID_NEON_QREG_MODE (mode1)
23197 || VALID_NEON_STRUCT_MODE (mode1))
23198 && (VALID_NEON_DREG_MODE (mode2)
23199 || VALID_NEON_QREG_MODE (mode2)
23200 || VALID_NEON_STRUCT_MODE (mode2)))
23201 return true;
23203 return false;
23206 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23207 not used in arm mode. */
23209 enum reg_class
23210 arm_regno_class (int regno)
23212 if (regno == PC_REGNUM)
23213 return NO_REGS;
23215 if (TARGET_THUMB1)
23217 if (regno == STACK_POINTER_REGNUM)
23218 return STACK_REG;
23219 if (regno == CC_REGNUM)
23220 return CC_REG;
23221 if (regno < 8)
23222 return LO_REGS;
23223 return HI_REGS;
23226 if (TARGET_THUMB2 && regno < 8)
23227 return LO_REGS;
23229 if ( regno <= LAST_ARM_REGNUM
23230 || regno == FRAME_POINTER_REGNUM
23231 || regno == ARG_POINTER_REGNUM)
23232 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23234 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23235 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23237 if (IS_VFP_REGNUM (regno))
23239 if (regno <= D7_VFP_REGNUM)
23240 return VFP_D0_D7_REGS;
23241 else if (regno <= LAST_LO_VFP_REGNUM)
23242 return VFP_LO_REGS;
23243 else
23244 return VFP_HI_REGS;
23247 if (IS_IWMMXT_REGNUM (regno))
23248 return IWMMXT_REGS;
23250 if (IS_IWMMXT_GR_REGNUM (regno))
23251 return IWMMXT_GR_REGS;
23253 return NO_REGS;
23256 /* Handle a special case when computing the offset
23257 of an argument from the frame pointer. */
23259 arm_debugger_arg_offset (int value, rtx addr)
23261 rtx_insn *insn;
23263 /* We are only interested if dbxout_parms() failed to compute the offset. */
23264 if (value != 0)
23265 return 0;
23267 /* We can only cope with the case where the address is held in a register. */
23268 if (!REG_P (addr))
23269 return 0;
23271 /* If we are using the frame pointer to point at the argument, then
23272 an offset of 0 is correct. */
23273 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23274 return 0;
23276 /* If we are using the stack pointer to point at the
23277 argument, then an offset of 0 is correct. */
23278 /* ??? Check this is consistent with thumb2 frame layout. */
23279 if ((TARGET_THUMB || !frame_pointer_needed)
23280 && REGNO (addr) == SP_REGNUM)
23281 return 0;
23283 /* Oh dear. The argument is pointed to by a register rather
23284 than being held in a register, or being stored at a known
23285 offset from the frame pointer. Since GDB only understands
23286 those two kinds of argument we must translate the address
23287 held in the register into an offset from the frame pointer.
23288 We do this by searching through the insns for the function
23289 looking to see where this register gets its value. If the
23290 register is initialized from the frame pointer plus an offset
23291 then we are in luck and we can continue, otherwise we give up.
23293 This code is exercised by producing debugging information
23294 for a function with arguments like this:
23296 double func (double a, double b, int c, double d) {return d;}
23298 Without this code the stab for parameter 'd' will be set to
23299 an offset of 0 from the frame pointer, rather than 8. */
23301 /* The if() statement says:
23303 If the insn is a normal instruction
23304 and if the insn is setting the value in a register
23305 and if the register being set is the register holding the address of the argument
23306 and if the address is computing by an addition
23307 that involves adding to a register
23308 which is the frame pointer
23309 a constant integer
23311 then... */
23313 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23315 if ( NONJUMP_INSN_P (insn)
23316 && GET_CODE (PATTERN (insn)) == SET
23317 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23318 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23319 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23320 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23321 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23324 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23326 break;
23330 if (value == 0)
23332 debug_rtx (addr);
23333 warning (0, "unable to compute real location of stacked parameter");
23334 value = 8; /* XXX magic hack */
23337 return value;
23340 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23342 static const char *
23343 arm_invalid_parameter_type (const_tree t)
23345 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23346 return N_("function parameters cannot have __fp16 type");
23347 return NULL;
23350 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23352 static const char *
23353 arm_invalid_return_type (const_tree t)
23355 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23356 return N_("functions cannot return __fp16 type");
23357 return NULL;
23360 /* Implement TARGET_PROMOTED_TYPE. */
23362 static tree
23363 arm_promoted_type (const_tree t)
23365 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23366 return float_type_node;
23367 return NULL_TREE;
23370 /* Implement TARGET_CONVERT_TO_TYPE.
23371 Specifically, this hook implements the peculiarity of the ARM
23372 half-precision floating-point C semantics that requires conversions between
23373 __fp16 to or from double to do an intermediate conversion to float. */
23375 static tree
23376 arm_convert_to_type (tree type, tree expr)
23378 tree fromtype = TREE_TYPE (expr);
23379 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
23380 return NULL_TREE;
23381 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
23382 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
23383 return convert (type, convert (float_type_node, expr));
23384 return NULL_TREE;
23387 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23388 This simply adds HFmode as a supported mode; even though we don't
23389 implement arithmetic on this type directly, it's supported by
23390 optabs conversions, much the way the double-word arithmetic is
23391 special-cased in the default hook. */
23393 static bool
23394 arm_scalar_mode_supported_p (machine_mode mode)
23396 if (mode == HFmode)
23397 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23398 else if (ALL_FIXED_POINT_MODE_P (mode))
23399 return true;
23400 else
23401 return default_scalar_mode_supported_p (mode);
23404 /* Emit code to reinterpret one Neon type as another, without altering bits. */
23405 void
23406 neon_reinterpret (rtx dest, rtx src)
23408 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
23411 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23412 not to early-clobber SRC registers in the process.
23414 We assume that the operands described by SRC and DEST represent a
23415 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23416 number of components into which the copy has been decomposed. */
23417 void
23418 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23420 unsigned int i;
23422 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23423 || REGNO (operands[0]) < REGNO (operands[1]))
23425 for (i = 0; i < count; i++)
23427 operands[2 * i] = dest[i];
23428 operands[2 * i + 1] = src[i];
23431 else
23433 for (i = 0; i < count; i++)
23435 operands[2 * i] = dest[count - i - 1];
23436 operands[2 * i + 1] = src[count - i - 1];
23441 /* Split operands into moves from op[1] + op[2] into op[0]. */
23443 void
23444 neon_split_vcombine (rtx operands[3])
23446 unsigned int dest = REGNO (operands[0]);
23447 unsigned int src1 = REGNO (operands[1]);
23448 unsigned int src2 = REGNO (operands[2]);
23449 machine_mode halfmode = GET_MODE (operands[1]);
23450 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23451 rtx destlo, desthi;
23453 if (src1 == dest && src2 == dest + halfregs)
23455 /* No-op move. Can't split to nothing; emit something. */
23456 emit_note (NOTE_INSN_DELETED);
23457 return;
23460 /* Preserve register attributes for variable tracking. */
23461 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23462 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23463 GET_MODE_SIZE (halfmode));
23465 /* Special case of reversed high/low parts. Use VSWP. */
23466 if (src2 == dest && src1 == dest + halfregs)
23468 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
23469 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
23470 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23471 return;
23474 if (!reg_overlap_mentioned_p (operands[2], destlo))
23476 /* Try to avoid unnecessary moves if part of the result
23477 is in the right place already. */
23478 if (src1 != dest)
23479 emit_move_insn (destlo, operands[1]);
23480 if (src2 != dest + halfregs)
23481 emit_move_insn (desthi, operands[2]);
23483 else
23485 if (src2 != dest + halfregs)
23486 emit_move_insn (desthi, operands[2]);
23487 if (src1 != dest)
23488 emit_move_insn (destlo, operands[1]);
23492 /* Return the number (counting from 0) of
23493 the least significant set bit in MASK. */
23495 inline static int
23496 number_of_first_bit_set (unsigned mask)
23498 return ctz_hwi (mask);
23501 /* Like emit_multi_reg_push, but allowing for a different set of
23502 registers to be described as saved. MASK is the set of registers
23503 to be saved; REAL_REGS is the set of registers to be described as
23504 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23506 static rtx_insn *
23507 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23509 unsigned long regno;
23510 rtx par[10], tmp, reg;
23511 rtx_insn *insn;
23512 int i, j;
23514 /* Build the parallel of the registers actually being stored. */
23515 for (i = 0; mask; ++i, mask &= mask - 1)
23517 regno = ctz_hwi (mask);
23518 reg = gen_rtx_REG (SImode, regno);
23520 if (i == 0)
23521 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23522 else
23523 tmp = gen_rtx_USE (VOIDmode, reg);
23525 par[i] = tmp;
23528 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23529 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23530 tmp = gen_frame_mem (BLKmode, tmp);
23531 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
23532 par[0] = tmp;
23534 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23535 insn = emit_insn (tmp);
23537 /* Always build the stack adjustment note for unwind info. */
23538 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23539 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
23540 par[0] = tmp;
23542 /* Build the parallel of the registers recorded as saved for unwind. */
23543 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23545 regno = ctz_hwi (real_regs);
23546 reg = gen_rtx_REG (SImode, regno);
23548 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23549 tmp = gen_frame_mem (SImode, tmp);
23550 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
23551 RTX_FRAME_RELATED_P (tmp) = 1;
23552 par[j + 1] = tmp;
23555 if (j == 0)
23556 tmp = par[0];
23557 else
23559 RTX_FRAME_RELATED_P (par[0]) = 1;
23560 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23563 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23565 return insn;
23568 /* Emit code to push or pop registers to or from the stack. F is the
23569 assembly file. MASK is the registers to pop. */
23570 static void
23571 thumb_pop (FILE *f, unsigned long mask)
23573 int regno;
23574 int lo_mask = mask & 0xFF;
23575 int pushed_words = 0;
23577 gcc_assert (mask);
23579 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23581 /* Special case. Do not generate a POP PC statement here, do it in
23582 thumb_exit() */
23583 thumb_exit (f, -1);
23584 return;
23587 fprintf (f, "\tpop\t{");
23589 /* Look at the low registers first. */
23590 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23592 if (lo_mask & 1)
23594 asm_fprintf (f, "%r", regno);
23596 if ((lo_mask & ~1) != 0)
23597 fprintf (f, ", ");
23599 pushed_words++;
23603 if (mask & (1 << PC_REGNUM))
23605 /* Catch popping the PC. */
23606 if (TARGET_INTERWORK || TARGET_BACKTRACE
23607 || crtl->calls_eh_return)
23609 /* The PC is never poped directly, instead
23610 it is popped into r3 and then BX is used. */
23611 fprintf (f, "}\n");
23613 thumb_exit (f, -1);
23615 return;
23617 else
23619 if (mask & 0xFF)
23620 fprintf (f, ", ");
23622 asm_fprintf (f, "%r", PC_REGNUM);
23626 fprintf (f, "}\n");
23629 /* Generate code to return from a thumb function.
23630 If 'reg_containing_return_addr' is -1, then the return address is
23631 actually on the stack, at the stack pointer. */
23632 static void
23633 thumb_exit (FILE *f, int reg_containing_return_addr)
23635 unsigned regs_available_for_popping;
23636 unsigned regs_to_pop;
23637 int pops_needed;
23638 unsigned available;
23639 unsigned required;
23640 machine_mode mode;
23641 int size;
23642 int restore_a4 = FALSE;
23644 /* Compute the registers we need to pop. */
23645 regs_to_pop = 0;
23646 pops_needed = 0;
23648 if (reg_containing_return_addr == -1)
23650 regs_to_pop |= 1 << LR_REGNUM;
23651 ++pops_needed;
23654 if (TARGET_BACKTRACE)
23656 /* Restore the (ARM) frame pointer and stack pointer. */
23657 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23658 pops_needed += 2;
23661 /* If there is nothing to pop then just emit the BX instruction and
23662 return. */
23663 if (pops_needed == 0)
23665 if (crtl->calls_eh_return)
23666 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23668 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23669 return;
23671 /* Otherwise if we are not supporting interworking and we have not created
23672 a backtrace structure and the function was not entered in ARM mode then
23673 just pop the return address straight into the PC. */
23674 else if (!TARGET_INTERWORK
23675 && !TARGET_BACKTRACE
23676 && !is_called_in_ARM_mode (current_function_decl)
23677 && !crtl->calls_eh_return)
23679 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23680 return;
23683 /* Find out how many of the (return) argument registers we can corrupt. */
23684 regs_available_for_popping = 0;
23686 /* If returning via __builtin_eh_return, the bottom three registers
23687 all contain information needed for the return. */
23688 if (crtl->calls_eh_return)
23689 size = 12;
23690 else
23692 /* If we can deduce the registers used from the function's
23693 return value. This is more reliable that examining
23694 df_regs_ever_live_p () because that will be set if the register is
23695 ever used in the function, not just if the register is used
23696 to hold a return value. */
23698 if (crtl->return_rtx != 0)
23699 mode = GET_MODE (crtl->return_rtx);
23700 else
23701 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23703 size = GET_MODE_SIZE (mode);
23705 if (size == 0)
23707 /* In a void function we can use any argument register.
23708 In a function that returns a structure on the stack
23709 we can use the second and third argument registers. */
23710 if (mode == VOIDmode)
23711 regs_available_for_popping =
23712 (1 << ARG_REGISTER (1))
23713 | (1 << ARG_REGISTER (2))
23714 | (1 << ARG_REGISTER (3));
23715 else
23716 regs_available_for_popping =
23717 (1 << ARG_REGISTER (2))
23718 | (1 << ARG_REGISTER (3));
23720 else if (size <= 4)
23721 regs_available_for_popping =
23722 (1 << ARG_REGISTER (2))
23723 | (1 << ARG_REGISTER (3));
23724 else if (size <= 8)
23725 regs_available_for_popping =
23726 (1 << ARG_REGISTER (3));
23729 /* Match registers to be popped with registers into which we pop them. */
23730 for (available = regs_available_for_popping,
23731 required = regs_to_pop;
23732 required != 0 && available != 0;
23733 available &= ~(available & - available),
23734 required &= ~(required & - required))
23735 -- pops_needed;
23737 /* If we have any popping registers left over, remove them. */
23738 if (available > 0)
23739 regs_available_for_popping &= ~available;
23741 /* Otherwise if we need another popping register we can use
23742 the fourth argument register. */
23743 else if (pops_needed)
23745 /* If we have not found any free argument registers and
23746 reg a4 contains the return address, we must move it. */
23747 if (regs_available_for_popping == 0
23748 && reg_containing_return_addr == LAST_ARG_REGNUM)
23750 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23751 reg_containing_return_addr = LR_REGNUM;
23753 else if (size > 12)
23755 /* Register a4 is being used to hold part of the return value,
23756 but we have dire need of a free, low register. */
23757 restore_a4 = TRUE;
23759 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23762 if (reg_containing_return_addr != LAST_ARG_REGNUM)
23764 /* The fourth argument register is available. */
23765 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23767 --pops_needed;
23771 /* Pop as many registers as we can. */
23772 thumb_pop (f, regs_available_for_popping);
23774 /* Process the registers we popped. */
23775 if (reg_containing_return_addr == -1)
23777 /* The return address was popped into the lowest numbered register. */
23778 regs_to_pop &= ~(1 << LR_REGNUM);
23780 reg_containing_return_addr =
23781 number_of_first_bit_set (regs_available_for_popping);
23783 /* Remove this register for the mask of available registers, so that
23784 the return address will not be corrupted by further pops. */
23785 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
23788 /* If we popped other registers then handle them here. */
23789 if (regs_available_for_popping)
23791 int frame_pointer;
23793 /* Work out which register currently contains the frame pointer. */
23794 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
23796 /* Move it into the correct place. */
23797 asm_fprintf (f, "\tmov\t%r, %r\n",
23798 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
23800 /* (Temporarily) remove it from the mask of popped registers. */
23801 regs_available_for_popping &= ~(1 << frame_pointer);
23802 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
23804 if (regs_available_for_popping)
23806 int stack_pointer;
23808 /* We popped the stack pointer as well,
23809 find the register that contains it. */
23810 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
23812 /* Move it into the stack register. */
23813 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
23815 /* At this point we have popped all necessary registers, so
23816 do not worry about restoring regs_available_for_popping
23817 to its correct value:
23819 assert (pops_needed == 0)
23820 assert (regs_available_for_popping == (1 << frame_pointer))
23821 assert (regs_to_pop == (1 << STACK_POINTER)) */
23823 else
23825 /* Since we have just move the popped value into the frame
23826 pointer, the popping register is available for reuse, and
23827 we know that we still have the stack pointer left to pop. */
23828 regs_available_for_popping |= (1 << frame_pointer);
23832 /* If we still have registers left on the stack, but we no longer have
23833 any registers into which we can pop them, then we must move the return
23834 address into the link register and make available the register that
23835 contained it. */
23836 if (regs_available_for_popping == 0 && pops_needed > 0)
23838 regs_available_for_popping |= 1 << reg_containing_return_addr;
23840 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
23841 reg_containing_return_addr);
23843 reg_containing_return_addr = LR_REGNUM;
23846 /* If we have registers left on the stack then pop some more.
23847 We know that at most we will want to pop FP and SP. */
23848 if (pops_needed > 0)
23850 int popped_into;
23851 int move_to;
23853 thumb_pop (f, regs_available_for_popping);
23855 /* We have popped either FP or SP.
23856 Move whichever one it is into the correct register. */
23857 popped_into = number_of_first_bit_set (regs_available_for_popping);
23858 move_to = number_of_first_bit_set (regs_to_pop);
23860 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
23862 regs_to_pop &= ~(1 << move_to);
23864 --pops_needed;
23867 /* If we still have not popped everything then we must have only
23868 had one register available to us and we are now popping the SP. */
23869 if (pops_needed > 0)
23871 int popped_into;
23873 thumb_pop (f, regs_available_for_popping);
23875 popped_into = number_of_first_bit_set (regs_available_for_popping);
23877 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
23879 assert (regs_to_pop == (1 << STACK_POINTER))
23880 assert (pops_needed == 1)
23884 /* If necessary restore the a4 register. */
23885 if (restore_a4)
23887 if (reg_containing_return_addr != LR_REGNUM)
23889 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23890 reg_containing_return_addr = LR_REGNUM;
23893 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
23896 if (crtl->calls_eh_return)
23897 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23899 /* Return to caller. */
23900 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23903 /* Scan INSN just before assembler is output for it.
23904 For Thumb-1, we track the status of the condition codes; this
23905 information is used in the cbranchsi4_insn pattern. */
23906 void
23907 thumb1_final_prescan_insn (rtx_insn *insn)
23909 if (flag_print_asm_name)
23910 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
23911 INSN_ADDRESSES (INSN_UID (insn)));
23912 /* Don't overwrite the previous setter when we get to a cbranch. */
23913 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
23915 enum attr_conds conds;
23917 if (cfun->machine->thumb1_cc_insn)
23919 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
23920 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
23921 CC_STATUS_INIT;
23923 conds = get_attr_conds (insn);
23924 if (conds == CONDS_SET)
23926 rtx set = single_set (insn);
23927 cfun->machine->thumb1_cc_insn = insn;
23928 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
23929 cfun->machine->thumb1_cc_op1 = const0_rtx;
23930 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
23931 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
23933 rtx src1 = XEXP (SET_SRC (set), 1);
23934 if (src1 == const0_rtx)
23935 cfun->machine->thumb1_cc_mode = CCmode;
23937 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
23939 /* Record the src register operand instead of dest because
23940 cprop_hardreg pass propagates src. */
23941 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
23944 else if (conds != CONDS_NOCOND)
23945 cfun->machine->thumb1_cc_insn = NULL_RTX;
23948 /* Check if unexpected far jump is used. */
23949 if (cfun->machine->lr_save_eliminated
23950 && get_attr_far_jump (insn) == FAR_JUMP_YES)
23951 internal_error("Unexpected thumb1 far jump");
23955 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
23957 unsigned HOST_WIDE_INT mask = 0xff;
23958 int i;
23960 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
23961 if (val == 0) /* XXX */
23962 return 0;
23964 for (i = 0; i < 25; i++)
23965 if ((val & (mask << i)) == val)
23966 return 1;
23968 return 0;
23971 /* Returns nonzero if the current function contains,
23972 or might contain a far jump. */
23973 static int
23974 thumb_far_jump_used_p (void)
23976 rtx_insn *insn;
23977 bool far_jump = false;
23978 unsigned int func_size = 0;
23980 /* This test is only important for leaf functions. */
23981 /* assert (!leaf_function_p ()); */
23983 /* If we have already decided that far jumps may be used,
23984 do not bother checking again, and always return true even if
23985 it turns out that they are not being used. Once we have made
23986 the decision that far jumps are present (and that hence the link
23987 register will be pushed onto the stack) we cannot go back on it. */
23988 if (cfun->machine->far_jump_used)
23989 return 1;
23991 /* If this function is not being called from the prologue/epilogue
23992 generation code then it must be being called from the
23993 INITIAL_ELIMINATION_OFFSET macro. */
23994 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
23996 /* In this case we know that we are being asked about the elimination
23997 of the arg pointer register. If that register is not being used,
23998 then there are no arguments on the stack, and we do not have to
23999 worry that a far jump might force the prologue to push the link
24000 register, changing the stack offsets. In this case we can just
24001 return false, since the presence of far jumps in the function will
24002 not affect stack offsets.
24004 If the arg pointer is live (or if it was live, but has now been
24005 eliminated and so set to dead) then we do have to test to see if
24006 the function might contain a far jump. This test can lead to some
24007 false negatives, since before reload is completed, then length of
24008 branch instructions is not known, so gcc defaults to returning their
24009 longest length, which in turn sets the far jump attribute to true.
24011 A false negative will not result in bad code being generated, but it
24012 will result in a needless push and pop of the link register. We
24013 hope that this does not occur too often.
24015 If we need doubleword stack alignment this could affect the other
24016 elimination offsets so we can't risk getting it wrong. */
24017 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24018 cfun->machine->arg_pointer_live = 1;
24019 else if (!cfun->machine->arg_pointer_live)
24020 return 0;
24023 /* We should not change far_jump_used during or after reload, as there is
24024 no chance to change stack frame layout. */
24025 if (reload_in_progress || reload_completed)
24026 return 0;
24028 /* Check to see if the function contains a branch
24029 insn with the far jump attribute set. */
24030 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24032 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24034 far_jump = true;
24036 func_size += get_attr_length (insn);
24039 /* Attribute far_jump will always be true for thumb1 before
24040 shorten_branch pass. So checking far_jump attribute before
24041 shorten_branch isn't much useful.
24043 Following heuristic tries to estimate more accurately if a far jump
24044 may finally be used. The heuristic is very conservative as there is
24045 no chance to roll-back the decision of not to use far jump.
24047 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24048 2-byte insn is associated with a 4 byte constant pool. Using
24049 function size 2048/3 as the threshold is conservative enough. */
24050 if (far_jump)
24052 if ((func_size * 3) >= 2048)
24054 /* Record the fact that we have decided that
24055 the function does use far jumps. */
24056 cfun->machine->far_jump_used = 1;
24057 return 1;
24061 return 0;
24064 /* Return nonzero if FUNC must be entered in ARM mode. */
24066 is_called_in_ARM_mode (tree func)
24068 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24070 /* Ignore the problem about functions whose address is taken. */
24071 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24072 return TRUE;
24074 #ifdef ARM_PE
24075 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24076 #else
24077 return FALSE;
24078 #endif
24081 /* Given the stack offsets and register mask in OFFSETS, decide how
24082 many additional registers to push instead of subtracting a constant
24083 from SP. For epilogues the principle is the same except we use pop.
24084 FOR_PROLOGUE indicates which we're generating. */
24085 static int
24086 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24088 HOST_WIDE_INT amount;
24089 unsigned long live_regs_mask = offsets->saved_regs_mask;
24090 /* Extract a mask of the ones we can give to the Thumb's push/pop
24091 instruction. */
24092 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24093 /* Then count how many other high registers will need to be pushed. */
24094 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24095 int n_free, reg_base, size;
24097 if (!for_prologue && frame_pointer_needed)
24098 amount = offsets->locals_base - offsets->saved_regs;
24099 else
24100 amount = offsets->outgoing_args - offsets->saved_regs;
24102 /* If the stack frame size is 512 exactly, we can save one load
24103 instruction, which should make this a win even when optimizing
24104 for speed. */
24105 if (!optimize_size && amount != 512)
24106 return 0;
24108 /* Can't do this if there are high registers to push. */
24109 if (high_regs_pushed != 0)
24110 return 0;
24112 /* Shouldn't do it in the prologue if no registers would normally
24113 be pushed at all. In the epilogue, also allow it if we'll have
24114 a pop insn for the PC. */
24115 if (l_mask == 0
24116 && (for_prologue
24117 || TARGET_BACKTRACE
24118 || (live_regs_mask & 1 << LR_REGNUM) == 0
24119 || TARGET_INTERWORK
24120 || crtl->args.pretend_args_size != 0))
24121 return 0;
24123 /* Don't do this if thumb_expand_prologue wants to emit instructions
24124 between the push and the stack frame allocation. */
24125 if (for_prologue
24126 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24127 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24128 return 0;
24130 reg_base = 0;
24131 n_free = 0;
24132 if (!for_prologue)
24134 size = arm_size_return_regs ();
24135 reg_base = ARM_NUM_INTS (size);
24136 live_regs_mask >>= reg_base;
24139 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24140 && (for_prologue || call_used_regs[reg_base + n_free]))
24142 live_regs_mask >>= 1;
24143 n_free++;
24146 if (n_free == 0)
24147 return 0;
24148 gcc_assert (amount / 4 * 4 == amount);
24150 if (amount >= 512 && (amount - n_free * 4) < 512)
24151 return (amount - 508) / 4;
24152 if (amount <= n_free * 4)
24153 return amount / 4;
24154 return 0;
24157 /* The bits which aren't usefully expanded as rtl. */
24158 const char *
24159 thumb1_unexpanded_epilogue (void)
24161 arm_stack_offsets *offsets;
24162 int regno;
24163 unsigned long live_regs_mask = 0;
24164 int high_regs_pushed = 0;
24165 int extra_pop;
24166 int had_to_push_lr;
24167 int size;
24169 if (cfun->machine->return_used_this_function != 0)
24170 return "";
24172 if (IS_NAKED (arm_current_func_type ()))
24173 return "";
24175 offsets = arm_get_frame_offsets ();
24176 live_regs_mask = offsets->saved_regs_mask;
24177 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24179 /* If we can deduce the registers used from the function's return value.
24180 This is more reliable that examining df_regs_ever_live_p () because that
24181 will be set if the register is ever used in the function, not just if
24182 the register is used to hold a return value. */
24183 size = arm_size_return_regs ();
24185 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24186 if (extra_pop > 0)
24188 unsigned long extra_mask = (1 << extra_pop) - 1;
24189 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24192 /* The prolog may have pushed some high registers to use as
24193 work registers. e.g. the testsuite file:
24194 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24195 compiles to produce:
24196 push {r4, r5, r6, r7, lr}
24197 mov r7, r9
24198 mov r6, r8
24199 push {r6, r7}
24200 as part of the prolog. We have to undo that pushing here. */
24202 if (high_regs_pushed)
24204 unsigned long mask = live_regs_mask & 0xff;
24205 int next_hi_reg;
24207 /* The available low registers depend on the size of the value we are
24208 returning. */
24209 if (size <= 12)
24210 mask |= 1 << 3;
24211 if (size <= 8)
24212 mask |= 1 << 2;
24214 if (mask == 0)
24215 /* Oh dear! We have no low registers into which we can pop
24216 high registers! */
24217 internal_error
24218 ("no low registers available for popping high registers");
24220 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24221 if (live_regs_mask & (1 << next_hi_reg))
24222 break;
24224 while (high_regs_pushed)
24226 /* Find lo register(s) into which the high register(s) can
24227 be popped. */
24228 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24230 if (mask & (1 << regno))
24231 high_regs_pushed--;
24232 if (high_regs_pushed == 0)
24233 break;
24236 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24238 /* Pop the values into the low register(s). */
24239 thumb_pop (asm_out_file, mask);
24241 /* Move the value(s) into the high registers. */
24242 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24244 if (mask & (1 << regno))
24246 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24247 regno);
24249 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24250 if (live_regs_mask & (1 << next_hi_reg))
24251 break;
24255 live_regs_mask &= ~0x0f00;
24258 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24259 live_regs_mask &= 0xff;
24261 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24263 /* Pop the return address into the PC. */
24264 if (had_to_push_lr)
24265 live_regs_mask |= 1 << PC_REGNUM;
24267 /* Either no argument registers were pushed or a backtrace
24268 structure was created which includes an adjusted stack
24269 pointer, so just pop everything. */
24270 if (live_regs_mask)
24271 thumb_pop (asm_out_file, live_regs_mask);
24273 /* We have either just popped the return address into the
24274 PC or it is was kept in LR for the entire function.
24275 Note that thumb_pop has already called thumb_exit if the
24276 PC was in the list. */
24277 if (!had_to_push_lr)
24278 thumb_exit (asm_out_file, LR_REGNUM);
24280 else
24282 /* Pop everything but the return address. */
24283 if (live_regs_mask)
24284 thumb_pop (asm_out_file, live_regs_mask);
24286 if (had_to_push_lr)
24288 if (size > 12)
24290 /* We have no free low regs, so save one. */
24291 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24292 LAST_ARG_REGNUM);
24295 /* Get the return address into a temporary register. */
24296 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24298 if (size > 12)
24300 /* Move the return address to lr. */
24301 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24302 LAST_ARG_REGNUM);
24303 /* Restore the low register. */
24304 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24305 IP_REGNUM);
24306 regno = LR_REGNUM;
24308 else
24309 regno = LAST_ARG_REGNUM;
24311 else
24312 regno = LR_REGNUM;
24314 /* Remove the argument registers that were pushed onto the stack. */
24315 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24316 SP_REGNUM, SP_REGNUM,
24317 crtl->args.pretend_args_size);
24319 thumb_exit (asm_out_file, regno);
24322 return "";
24325 /* Functions to save and restore machine-specific function data. */
24326 static struct machine_function *
24327 arm_init_machine_status (void)
24329 struct machine_function *machine;
24330 machine = ggc_cleared_alloc<machine_function> ();
24332 #if ARM_FT_UNKNOWN != 0
24333 machine->func_type = ARM_FT_UNKNOWN;
24334 #endif
24335 return machine;
24338 /* Return an RTX indicating where the return address to the
24339 calling function can be found. */
24341 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24343 if (count != 0)
24344 return NULL_RTX;
24346 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24349 /* Do anything needed before RTL is emitted for each function. */
24350 void
24351 arm_init_expanders (void)
24353 /* Arrange to initialize and mark the machine per-function status. */
24354 init_machine_status = arm_init_machine_status;
24356 /* This is to stop the combine pass optimizing away the alignment
24357 adjustment of va_arg. */
24358 /* ??? It is claimed that this should not be necessary. */
24359 if (cfun)
24360 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24364 /* Like arm_compute_initial_elimination offset. Simpler because there
24365 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24366 to point at the base of the local variables after static stack
24367 space for a function has been allocated. */
24369 HOST_WIDE_INT
24370 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24372 arm_stack_offsets *offsets;
24374 offsets = arm_get_frame_offsets ();
24376 switch (from)
24378 case ARG_POINTER_REGNUM:
24379 switch (to)
24381 case STACK_POINTER_REGNUM:
24382 return offsets->outgoing_args - offsets->saved_args;
24384 case FRAME_POINTER_REGNUM:
24385 return offsets->soft_frame - offsets->saved_args;
24387 case ARM_HARD_FRAME_POINTER_REGNUM:
24388 return offsets->saved_regs - offsets->saved_args;
24390 case THUMB_HARD_FRAME_POINTER_REGNUM:
24391 return offsets->locals_base - offsets->saved_args;
24393 default:
24394 gcc_unreachable ();
24396 break;
24398 case FRAME_POINTER_REGNUM:
24399 switch (to)
24401 case STACK_POINTER_REGNUM:
24402 return offsets->outgoing_args - offsets->soft_frame;
24404 case ARM_HARD_FRAME_POINTER_REGNUM:
24405 return offsets->saved_regs - offsets->soft_frame;
24407 case THUMB_HARD_FRAME_POINTER_REGNUM:
24408 return offsets->locals_base - offsets->soft_frame;
24410 default:
24411 gcc_unreachable ();
24413 break;
24415 default:
24416 gcc_unreachable ();
24420 /* Generate the function's prologue. */
24422 void
24423 thumb1_expand_prologue (void)
24425 rtx_insn *insn;
24427 HOST_WIDE_INT amount;
24428 arm_stack_offsets *offsets;
24429 unsigned long func_type;
24430 int regno;
24431 unsigned long live_regs_mask;
24432 unsigned long l_mask;
24433 unsigned high_regs_pushed = 0;
24435 func_type = arm_current_func_type ();
24437 /* Naked functions don't have prologues. */
24438 if (IS_NAKED (func_type))
24439 return;
24441 if (IS_INTERRUPT (func_type))
24443 error ("interrupt Service Routines cannot be coded in Thumb mode");
24444 return;
24447 if (is_called_in_ARM_mode (current_function_decl))
24448 emit_insn (gen_prologue_thumb1_interwork ());
24450 offsets = arm_get_frame_offsets ();
24451 live_regs_mask = offsets->saved_regs_mask;
24453 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24454 l_mask = live_regs_mask & 0x40ff;
24455 /* Then count how many other high registers will need to be pushed. */
24456 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24458 if (crtl->args.pretend_args_size)
24460 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24462 if (cfun->machine->uses_anonymous_args)
24464 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24465 unsigned long mask;
24467 mask = 1ul << (LAST_ARG_REGNUM + 1);
24468 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24470 insn = thumb1_emit_multi_reg_push (mask, 0);
24472 else
24474 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24475 stack_pointer_rtx, x));
24477 RTX_FRAME_RELATED_P (insn) = 1;
24480 if (TARGET_BACKTRACE)
24482 HOST_WIDE_INT offset = 0;
24483 unsigned work_register;
24484 rtx work_reg, x, arm_hfp_rtx;
24486 /* We have been asked to create a stack backtrace structure.
24487 The code looks like this:
24489 0 .align 2
24490 0 func:
24491 0 sub SP, #16 Reserve space for 4 registers.
24492 2 push {R7} Push low registers.
24493 4 add R7, SP, #20 Get the stack pointer before the push.
24494 6 str R7, [SP, #8] Store the stack pointer
24495 (before reserving the space).
24496 8 mov R7, PC Get hold of the start of this code + 12.
24497 10 str R7, [SP, #16] Store it.
24498 12 mov R7, FP Get hold of the current frame pointer.
24499 14 str R7, [SP, #4] Store it.
24500 16 mov R7, LR Get hold of the current return address.
24501 18 str R7, [SP, #12] Store it.
24502 20 add R7, SP, #16 Point at the start of the
24503 backtrace structure.
24504 22 mov FP, R7 Put this value into the frame pointer. */
24506 work_register = thumb_find_work_register (live_regs_mask);
24507 work_reg = gen_rtx_REG (SImode, work_register);
24508 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24510 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24511 stack_pointer_rtx, GEN_INT (-16)));
24512 RTX_FRAME_RELATED_P (insn) = 1;
24514 if (l_mask)
24516 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24517 RTX_FRAME_RELATED_P (insn) = 1;
24519 offset = bit_count (l_mask) * UNITS_PER_WORD;
24522 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24523 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24525 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24526 x = gen_frame_mem (SImode, x);
24527 emit_move_insn (x, work_reg);
24529 /* Make sure that the instruction fetching the PC is in the right place
24530 to calculate "start of backtrace creation code + 12". */
24531 /* ??? The stores using the common WORK_REG ought to be enough to
24532 prevent the scheduler from doing anything weird. Failing that
24533 we could always move all of the following into an UNSPEC_VOLATILE. */
24534 if (l_mask)
24536 x = gen_rtx_REG (SImode, PC_REGNUM);
24537 emit_move_insn (work_reg, x);
24539 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24540 x = gen_frame_mem (SImode, x);
24541 emit_move_insn (x, work_reg);
24543 emit_move_insn (work_reg, arm_hfp_rtx);
24545 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24546 x = gen_frame_mem (SImode, x);
24547 emit_move_insn (x, work_reg);
24549 else
24551 emit_move_insn (work_reg, arm_hfp_rtx);
24553 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24554 x = gen_frame_mem (SImode, x);
24555 emit_move_insn (x, work_reg);
24557 x = gen_rtx_REG (SImode, PC_REGNUM);
24558 emit_move_insn (work_reg, x);
24560 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24561 x = gen_frame_mem (SImode, x);
24562 emit_move_insn (x, work_reg);
24565 x = gen_rtx_REG (SImode, LR_REGNUM);
24566 emit_move_insn (work_reg, x);
24568 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24569 x = gen_frame_mem (SImode, x);
24570 emit_move_insn (x, work_reg);
24572 x = GEN_INT (offset + 12);
24573 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24575 emit_move_insn (arm_hfp_rtx, work_reg);
24577 /* Optimization: If we are not pushing any low registers but we are going
24578 to push some high registers then delay our first push. This will just
24579 be a push of LR and we can combine it with the push of the first high
24580 register. */
24581 else if ((l_mask & 0xff) != 0
24582 || (high_regs_pushed == 0 && l_mask))
24584 unsigned long mask = l_mask;
24585 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24586 insn = thumb1_emit_multi_reg_push (mask, mask);
24587 RTX_FRAME_RELATED_P (insn) = 1;
24590 if (high_regs_pushed)
24592 unsigned pushable_regs;
24593 unsigned next_hi_reg;
24594 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24595 : crtl->args.info.nregs;
24596 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24598 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24599 if (live_regs_mask & (1 << next_hi_reg))
24600 break;
24602 /* Here we need to mask out registers used for passing arguments
24603 even if they can be pushed. This is to avoid using them to stash the high
24604 registers. Such kind of stash may clobber the use of arguments. */
24605 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
24607 if (pushable_regs == 0)
24608 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24610 while (high_regs_pushed > 0)
24612 unsigned long real_regs_mask = 0;
24614 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
24616 if (pushable_regs & (1 << regno))
24618 emit_move_insn (gen_rtx_REG (SImode, regno),
24619 gen_rtx_REG (SImode, next_hi_reg));
24621 high_regs_pushed --;
24622 real_regs_mask |= (1 << next_hi_reg);
24624 if (high_regs_pushed)
24626 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24627 next_hi_reg --)
24628 if (live_regs_mask & (1 << next_hi_reg))
24629 break;
24631 else
24633 pushable_regs &= ~((1 << regno) - 1);
24634 break;
24639 /* If we had to find a work register and we have not yet
24640 saved the LR then add it to the list of regs to push. */
24641 if (l_mask == (1 << LR_REGNUM))
24643 pushable_regs |= l_mask;
24644 real_regs_mask |= l_mask;
24645 l_mask = 0;
24648 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
24649 RTX_FRAME_RELATED_P (insn) = 1;
24653 /* Load the pic register before setting the frame pointer,
24654 so we can use r7 as a temporary work register. */
24655 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24656 arm_load_pic_register (live_regs_mask);
24658 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24659 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24660 stack_pointer_rtx);
24662 if (flag_stack_usage_info)
24663 current_function_static_stack_size
24664 = offsets->outgoing_args - offsets->saved_args;
24666 amount = offsets->outgoing_args - offsets->saved_regs;
24667 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24668 if (amount)
24670 if (amount < 512)
24672 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24673 GEN_INT (- amount)));
24674 RTX_FRAME_RELATED_P (insn) = 1;
24676 else
24678 rtx reg, dwarf;
24680 /* The stack decrement is too big for an immediate value in a single
24681 insn. In theory we could issue multiple subtracts, but after
24682 three of them it becomes more space efficient to place the full
24683 value in the constant pool and load into a register. (Also the
24684 ARM debugger really likes to see only one stack decrement per
24685 function). So instead we look for a scratch register into which
24686 we can load the decrement, and then we subtract this from the
24687 stack pointer. Unfortunately on the thumb the only available
24688 scratch registers are the argument registers, and we cannot use
24689 these as they may hold arguments to the function. Instead we
24690 attempt to locate a call preserved register which is used by this
24691 function. If we can find one, then we know that it will have
24692 been pushed at the start of the prologue and so we can corrupt
24693 it now. */
24694 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24695 if (live_regs_mask & (1 << regno))
24696 break;
24698 gcc_assert(regno <= LAST_LO_REGNUM);
24700 reg = gen_rtx_REG (SImode, regno);
24702 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24704 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24705 stack_pointer_rtx, reg));
24707 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
24708 plus_constant (Pmode, stack_pointer_rtx,
24709 -amount));
24710 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24711 RTX_FRAME_RELATED_P (insn) = 1;
24715 if (frame_pointer_needed)
24716 thumb_set_frame_pointer (offsets);
24718 /* If we are profiling, make sure no instructions are scheduled before
24719 the call to mcount. Similarly if the user has requested no
24720 scheduling in the prolog. Similarly if we want non-call exceptions
24721 using the EABI unwinder, to prevent faulting instructions from being
24722 swapped with a stack adjustment. */
24723 if (crtl->profile || !TARGET_SCHED_PROLOG
24724 || (arm_except_unwind_info (&global_options) == UI_TARGET
24725 && cfun->can_throw_non_call_exceptions))
24726 emit_insn (gen_blockage ());
24728 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24729 if (live_regs_mask & 0xff)
24730 cfun->machine->lr_save_eliminated = 0;
24733 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
24734 POP instruction can be generated. LR should be replaced by PC. All
24735 the checks required are already done by USE_RETURN_INSN (). Hence,
24736 all we really need to check here is if single register is to be
24737 returned, or multiple register return. */
24738 void
24739 thumb2_expand_return (bool simple_return)
24741 int i, num_regs;
24742 unsigned long saved_regs_mask;
24743 arm_stack_offsets *offsets;
24745 offsets = arm_get_frame_offsets ();
24746 saved_regs_mask = offsets->saved_regs_mask;
24748 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
24749 if (saved_regs_mask & (1 << i))
24750 num_regs++;
24752 if (!simple_return && saved_regs_mask)
24754 if (num_regs == 1)
24756 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
24757 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
24758 rtx addr = gen_rtx_MEM (SImode,
24759 gen_rtx_POST_INC (SImode,
24760 stack_pointer_rtx));
24761 set_mem_alias_set (addr, get_frame_alias_set ());
24762 XVECEXP (par, 0, 0) = ret_rtx;
24763 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
24764 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
24765 emit_jump_insn (par);
24767 else
24769 saved_regs_mask &= ~ (1 << LR_REGNUM);
24770 saved_regs_mask |= (1 << PC_REGNUM);
24771 arm_emit_multi_reg_pop (saved_regs_mask);
24774 else
24776 emit_jump_insn (simple_return_rtx);
24780 void
24781 thumb1_expand_epilogue (void)
24783 HOST_WIDE_INT amount;
24784 arm_stack_offsets *offsets;
24785 int regno;
24787 /* Naked functions don't have prologues. */
24788 if (IS_NAKED (arm_current_func_type ()))
24789 return;
24791 offsets = arm_get_frame_offsets ();
24792 amount = offsets->outgoing_args - offsets->saved_regs;
24794 if (frame_pointer_needed)
24796 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
24797 amount = offsets->locals_base - offsets->saved_regs;
24799 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
24801 gcc_assert (amount >= 0);
24802 if (amount)
24804 emit_insn (gen_blockage ());
24806 if (amount < 512)
24807 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24808 GEN_INT (amount)));
24809 else
24811 /* r3 is always free in the epilogue. */
24812 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
24814 emit_insn (gen_movsi (reg, GEN_INT (amount)));
24815 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
24819 /* Emit a USE (stack_pointer_rtx), so that
24820 the stack adjustment will not be deleted. */
24821 emit_insn (gen_force_register_use (stack_pointer_rtx));
24823 if (crtl->profile || !TARGET_SCHED_PROLOG)
24824 emit_insn (gen_blockage ());
24826 /* Emit a clobber for each insn that will be restored in the epilogue,
24827 so that flow2 will get register lifetimes correct. */
24828 for (regno = 0; regno < 13; regno++)
24829 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
24830 emit_clobber (gen_rtx_REG (SImode, regno));
24832 if (! df_regs_ever_live_p (LR_REGNUM))
24833 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
24836 /* Epilogue code for APCS frame. */
24837 static void
24838 arm_expand_epilogue_apcs_frame (bool really_return)
24840 unsigned long func_type;
24841 unsigned long saved_regs_mask;
24842 int num_regs = 0;
24843 int i;
24844 int floats_from_frame = 0;
24845 arm_stack_offsets *offsets;
24847 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
24848 func_type = arm_current_func_type ();
24850 /* Get frame offsets for ARM. */
24851 offsets = arm_get_frame_offsets ();
24852 saved_regs_mask = offsets->saved_regs_mask;
24854 /* Find the offset of the floating-point save area in the frame. */
24855 floats_from_frame
24856 = (offsets->saved_args
24857 + arm_compute_static_chain_stack_bytes ()
24858 - offsets->frame);
24860 /* Compute how many core registers saved and how far away the floats are. */
24861 for (i = 0; i <= LAST_ARM_REGNUM; i++)
24862 if (saved_regs_mask & (1 << i))
24864 num_regs++;
24865 floats_from_frame += 4;
24868 if (TARGET_HARD_FLOAT && TARGET_VFP)
24870 int start_reg;
24871 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
24873 /* The offset is from IP_REGNUM. */
24874 int saved_size = arm_get_vfp_saved_size ();
24875 if (saved_size > 0)
24877 rtx_insn *insn;
24878 floats_from_frame += saved_size;
24879 insn = emit_insn (gen_addsi3 (ip_rtx,
24880 hard_frame_pointer_rtx,
24881 GEN_INT (-floats_from_frame)));
24882 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
24883 ip_rtx, hard_frame_pointer_rtx);
24886 /* Generate VFP register multi-pop. */
24887 start_reg = FIRST_VFP_REGNUM;
24889 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
24890 /* Look for a case where a reg does not need restoring. */
24891 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24892 && (!df_regs_ever_live_p (i + 1)
24893 || call_used_regs[i + 1]))
24895 if (start_reg != i)
24896 arm_emit_vfp_multi_reg_pop (start_reg,
24897 (i - start_reg) / 2,
24898 gen_rtx_REG (SImode,
24899 IP_REGNUM));
24900 start_reg = i + 2;
24903 /* Restore the remaining regs that we have discovered (or possibly
24904 even all of them, if the conditional in the for loop never
24905 fired). */
24906 if (start_reg != i)
24907 arm_emit_vfp_multi_reg_pop (start_reg,
24908 (i - start_reg) / 2,
24909 gen_rtx_REG (SImode, IP_REGNUM));
24912 if (TARGET_IWMMXT)
24914 /* The frame pointer is guaranteed to be non-double-word aligned, as
24915 it is set to double-word-aligned old_stack_pointer - 4. */
24916 rtx_insn *insn;
24917 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
24919 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
24920 if (df_regs_ever_live_p (i) && !call_used_regs[i])
24922 rtx addr = gen_frame_mem (V2SImode,
24923 plus_constant (Pmode, hard_frame_pointer_rtx,
24924 - lrm_count * 4));
24925 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24926 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24927 gen_rtx_REG (V2SImode, i),
24928 NULL_RTX);
24929 lrm_count += 2;
24933 /* saved_regs_mask should contain IP which contains old stack pointer
24934 at the time of activation creation. Since SP and IP are adjacent registers,
24935 we can restore the value directly into SP. */
24936 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
24937 saved_regs_mask &= ~(1 << IP_REGNUM);
24938 saved_regs_mask |= (1 << SP_REGNUM);
24940 /* There are two registers left in saved_regs_mask - LR and PC. We
24941 only need to restore LR (the return address), but to
24942 save time we can load it directly into PC, unless we need a
24943 special function exit sequence, or we are not really returning. */
24944 if (really_return
24945 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
24946 && !crtl->calls_eh_return)
24947 /* Delete LR from the register mask, so that LR on
24948 the stack is loaded into the PC in the register mask. */
24949 saved_regs_mask &= ~(1 << LR_REGNUM);
24950 else
24951 saved_regs_mask &= ~(1 << PC_REGNUM);
24953 num_regs = bit_count (saved_regs_mask);
24954 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
24956 rtx_insn *insn;
24957 emit_insn (gen_blockage ());
24958 /* Unwind the stack to just below the saved registers. */
24959 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24960 hard_frame_pointer_rtx,
24961 GEN_INT (- 4 * num_regs)));
24963 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
24964 stack_pointer_rtx, hard_frame_pointer_rtx);
24967 arm_emit_multi_reg_pop (saved_regs_mask);
24969 if (IS_INTERRUPT (func_type))
24971 /* Interrupt handlers will have pushed the
24972 IP onto the stack, so restore it now. */
24973 rtx_insn *insn;
24974 rtx addr = gen_rtx_MEM (SImode,
24975 gen_rtx_POST_INC (SImode,
24976 stack_pointer_rtx));
24977 set_mem_alias_set (addr, get_frame_alias_set ());
24978 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
24979 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24980 gen_rtx_REG (SImode, IP_REGNUM),
24981 NULL_RTX);
24984 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
24985 return;
24987 if (crtl->calls_eh_return)
24988 emit_insn (gen_addsi3 (stack_pointer_rtx,
24989 stack_pointer_rtx,
24990 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
24992 if (IS_STACKALIGN (func_type))
24993 /* Restore the original stack pointer. Before prologue, the stack was
24994 realigned and the original stack pointer saved in r0. For details,
24995 see comment in arm_expand_prologue. */
24996 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
24998 emit_jump_insn (simple_return_rtx);
25001 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25002 function is not a sibcall. */
25003 void
25004 arm_expand_epilogue (bool really_return)
25006 unsigned long func_type;
25007 unsigned long saved_regs_mask;
25008 int num_regs = 0;
25009 int i;
25010 int amount;
25011 arm_stack_offsets *offsets;
25013 func_type = arm_current_func_type ();
25015 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25016 let output_return_instruction take care of instruction emission if any. */
25017 if (IS_NAKED (func_type)
25018 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25020 if (really_return)
25021 emit_jump_insn (simple_return_rtx);
25022 return;
25025 /* If we are throwing an exception, then we really must be doing a
25026 return, so we can't tail-call. */
25027 gcc_assert (!crtl->calls_eh_return || really_return);
25029 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25031 arm_expand_epilogue_apcs_frame (really_return);
25032 return;
25035 /* Get frame offsets for ARM. */
25036 offsets = arm_get_frame_offsets ();
25037 saved_regs_mask = offsets->saved_regs_mask;
25038 num_regs = bit_count (saved_regs_mask);
25040 if (frame_pointer_needed)
25042 rtx_insn *insn;
25043 /* Restore stack pointer if necessary. */
25044 if (TARGET_ARM)
25046 /* In ARM mode, frame pointer points to first saved register.
25047 Restore stack pointer to last saved register. */
25048 amount = offsets->frame - offsets->saved_regs;
25050 /* Force out any pending memory operations that reference stacked data
25051 before stack de-allocation occurs. */
25052 emit_insn (gen_blockage ());
25053 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25054 hard_frame_pointer_rtx,
25055 GEN_INT (amount)));
25056 arm_add_cfa_adjust_cfa_note (insn, amount,
25057 stack_pointer_rtx,
25058 hard_frame_pointer_rtx);
25060 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25061 deleted. */
25062 emit_insn (gen_force_register_use (stack_pointer_rtx));
25064 else
25066 /* In Thumb-2 mode, the frame pointer points to the last saved
25067 register. */
25068 amount = offsets->locals_base - offsets->saved_regs;
25069 if (amount)
25071 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25072 hard_frame_pointer_rtx,
25073 GEN_INT (amount)));
25074 arm_add_cfa_adjust_cfa_note (insn, amount,
25075 hard_frame_pointer_rtx,
25076 hard_frame_pointer_rtx);
25079 /* Force out any pending memory operations that reference stacked data
25080 before stack de-allocation occurs. */
25081 emit_insn (gen_blockage ());
25082 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25083 hard_frame_pointer_rtx));
25084 arm_add_cfa_adjust_cfa_note (insn, 0,
25085 stack_pointer_rtx,
25086 hard_frame_pointer_rtx);
25087 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25088 deleted. */
25089 emit_insn (gen_force_register_use (stack_pointer_rtx));
25092 else
25094 /* Pop off outgoing args and local frame to adjust stack pointer to
25095 last saved register. */
25096 amount = offsets->outgoing_args - offsets->saved_regs;
25097 if (amount)
25099 rtx_insn *tmp;
25100 /* Force out any pending memory operations that reference stacked data
25101 before stack de-allocation occurs. */
25102 emit_insn (gen_blockage ());
25103 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25104 stack_pointer_rtx,
25105 GEN_INT (amount)));
25106 arm_add_cfa_adjust_cfa_note (tmp, amount,
25107 stack_pointer_rtx, stack_pointer_rtx);
25108 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25109 not deleted. */
25110 emit_insn (gen_force_register_use (stack_pointer_rtx));
25114 if (TARGET_HARD_FLOAT && TARGET_VFP)
25116 /* Generate VFP register multi-pop. */
25117 int end_reg = LAST_VFP_REGNUM + 1;
25119 /* Scan the registers in reverse order. We need to match
25120 any groupings made in the prologue and generate matching
25121 vldm operations. The need to match groups is because,
25122 unlike pop, vldm can only do consecutive regs. */
25123 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25124 /* Look for a case where a reg does not need restoring. */
25125 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25126 && (!df_regs_ever_live_p (i + 1)
25127 || call_used_regs[i + 1]))
25129 /* Restore the regs discovered so far (from reg+2 to
25130 end_reg). */
25131 if (end_reg > i + 2)
25132 arm_emit_vfp_multi_reg_pop (i + 2,
25133 (end_reg - (i + 2)) / 2,
25134 stack_pointer_rtx);
25135 end_reg = i;
25138 /* Restore the remaining regs that we have discovered (or possibly
25139 even all of them, if the conditional in the for loop never
25140 fired). */
25141 if (end_reg > i + 2)
25142 arm_emit_vfp_multi_reg_pop (i + 2,
25143 (end_reg - (i + 2)) / 2,
25144 stack_pointer_rtx);
25147 if (TARGET_IWMMXT)
25148 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25149 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25151 rtx_insn *insn;
25152 rtx addr = gen_rtx_MEM (V2SImode,
25153 gen_rtx_POST_INC (SImode,
25154 stack_pointer_rtx));
25155 set_mem_alias_set (addr, get_frame_alias_set ());
25156 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25157 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25158 gen_rtx_REG (V2SImode, i),
25159 NULL_RTX);
25160 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25161 stack_pointer_rtx, stack_pointer_rtx);
25164 if (saved_regs_mask)
25166 rtx insn;
25167 bool return_in_pc = false;
25169 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25170 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25171 && !IS_STACKALIGN (func_type)
25172 && really_return
25173 && crtl->args.pretend_args_size == 0
25174 && saved_regs_mask & (1 << LR_REGNUM)
25175 && !crtl->calls_eh_return)
25177 saved_regs_mask &= ~(1 << LR_REGNUM);
25178 saved_regs_mask |= (1 << PC_REGNUM);
25179 return_in_pc = true;
25182 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25184 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25185 if (saved_regs_mask & (1 << i))
25187 rtx addr = gen_rtx_MEM (SImode,
25188 gen_rtx_POST_INC (SImode,
25189 stack_pointer_rtx));
25190 set_mem_alias_set (addr, get_frame_alias_set ());
25192 if (i == PC_REGNUM)
25194 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25195 XVECEXP (insn, 0, 0) = ret_rtx;
25196 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
25197 gen_rtx_REG (SImode, i),
25198 addr);
25199 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25200 insn = emit_jump_insn (insn);
25202 else
25204 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25205 addr));
25206 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25207 gen_rtx_REG (SImode, i),
25208 NULL_RTX);
25209 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25210 stack_pointer_rtx,
25211 stack_pointer_rtx);
25215 else
25217 if (TARGET_LDRD
25218 && current_tune->prefer_ldrd_strd
25219 && !optimize_function_for_size_p (cfun))
25221 if (TARGET_THUMB2)
25222 thumb2_emit_ldrd_pop (saved_regs_mask);
25223 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25224 arm_emit_ldrd_pop (saved_regs_mask);
25225 else
25226 arm_emit_multi_reg_pop (saved_regs_mask);
25228 else
25229 arm_emit_multi_reg_pop (saved_regs_mask);
25232 if (return_in_pc)
25233 return;
25236 if (crtl->args.pretend_args_size)
25238 int i, j;
25239 rtx dwarf = NULL_RTX;
25240 rtx_insn *tmp =
25241 emit_insn (gen_addsi3 (stack_pointer_rtx,
25242 stack_pointer_rtx,
25243 GEN_INT (crtl->args.pretend_args_size)));
25245 RTX_FRAME_RELATED_P (tmp) = 1;
25247 if (cfun->machine->uses_anonymous_args)
25249 /* Restore pretend args. Refer arm_expand_prologue on how to save
25250 pretend_args in stack. */
25251 int num_regs = crtl->args.pretend_args_size / 4;
25252 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25253 for (j = 0, i = 0; j < num_regs; i++)
25254 if (saved_regs_mask & (1 << i))
25256 rtx reg = gen_rtx_REG (SImode, i);
25257 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25258 j++;
25260 REG_NOTES (tmp) = dwarf;
25262 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
25263 stack_pointer_rtx, stack_pointer_rtx);
25266 if (!really_return)
25267 return;
25269 if (crtl->calls_eh_return)
25270 emit_insn (gen_addsi3 (stack_pointer_rtx,
25271 stack_pointer_rtx,
25272 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25274 if (IS_STACKALIGN (func_type))
25275 /* Restore the original stack pointer. Before prologue, the stack was
25276 realigned and the original stack pointer saved in r0. For details,
25277 see comment in arm_expand_prologue. */
25278 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25280 emit_jump_insn (simple_return_rtx);
25283 /* Implementation of insn prologue_thumb1_interwork. This is the first
25284 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25286 const char *
25287 thumb1_output_interwork (void)
25289 const char * name;
25290 FILE *f = asm_out_file;
25292 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25293 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25294 == SYMBOL_REF);
25295 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25297 /* Generate code sequence to switch us into Thumb mode. */
25298 /* The .code 32 directive has already been emitted by
25299 ASM_DECLARE_FUNCTION_NAME. */
25300 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25301 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25303 /* Generate a label, so that the debugger will notice the
25304 change in instruction sets. This label is also used by
25305 the assembler to bypass the ARM code when this function
25306 is called from a Thumb encoded function elsewhere in the
25307 same file. Hence the definition of STUB_NAME here must
25308 agree with the definition in gas/config/tc-arm.c. */
25310 #define STUB_NAME ".real_start_of"
25312 fprintf (f, "\t.code\t16\n");
25313 #ifdef ARM_PE
25314 if (arm_dllexport_name_p (name))
25315 name = arm_strip_name_encoding (name);
25316 #endif
25317 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25318 fprintf (f, "\t.thumb_func\n");
25319 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25321 return "";
25324 /* Handle the case of a double word load into a low register from
25325 a computed memory address. The computed address may involve a
25326 register which is overwritten by the load. */
25327 const char *
25328 thumb_load_double_from_address (rtx *operands)
25330 rtx addr;
25331 rtx base;
25332 rtx offset;
25333 rtx arg1;
25334 rtx arg2;
25336 gcc_assert (REG_P (operands[0]));
25337 gcc_assert (MEM_P (operands[1]));
25339 /* Get the memory address. */
25340 addr = XEXP (operands[1], 0);
25342 /* Work out how the memory address is computed. */
25343 switch (GET_CODE (addr))
25345 case REG:
25346 operands[2] = adjust_address (operands[1], SImode, 4);
25348 if (REGNO (operands[0]) == REGNO (addr))
25350 output_asm_insn ("ldr\t%H0, %2", operands);
25351 output_asm_insn ("ldr\t%0, %1", operands);
25353 else
25355 output_asm_insn ("ldr\t%0, %1", operands);
25356 output_asm_insn ("ldr\t%H0, %2", operands);
25358 break;
25360 case CONST:
25361 /* Compute <address> + 4 for the high order load. */
25362 operands[2] = adjust_address (operands[1], SImode, 4);
25364 output_asm_insn ("ldr\t%0, %1", operands);
25365 output_asm_insn ("ldr\t%H0, %2", operands);
25366 break;
25368 case PLUS:
25369 arg1 = XEXP (addr, 0);
25370 arg2 = XEXP (addr, 1);
25372 if (CONSTANT_P (arg1))
25373 base = arg2, offset = arg1;
25374 else
25375 base = arg1, offset = arg2;
25377 gcc_assert (REG_P (base));
25379 /* Catch the case of <address> = <reg> + <reg> */
25380 if (REG_P (offset))
25382 int reg_offset = REGNO (offset);
25383 int reg_base = REGNO (base);
25384 int reg_dest = REGNO (operands[0]);
25386 /* Add the base and offset registers together into the
25387 higher destination register. */
25388 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25389 reg_dest + 1, reg_base, reg_offset);
25391 /* Load the lower destination register from the address in
25392 the higher destination register. */
25393 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25394 reg_dest, reg_dest + 1);
25396 /* Load the higher destination register from its own address
25397 plus 4. */
25398 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25399 reg_dest + 1, reg_dest + 1);
25401 else
25403 /* Compute <address> + 4 for the high order load. */
25404 operands[2] = adjust_address (operands[1], SImode, 4);
25406 /* If the computed address is held in the low order register
25407 then load the high order register first, otherwise always
25408 load the low order register first. */
25409 if (REGNO (operands[0]) == REGNO (base))
25411 output_asm_insn ("ldr\t%H0, %2", operands);
25412 output_asm_insn ("ldr\t%0, %1", operands);
25414 else
25416 output_asm_insn ("ldr\t%0, %1", operands);
25417 output_asm_insn ("ldr\t%H0, %2", operands);
25420 break;
25422 case LABEL_REF:
25423 /* With no registers to worry about we can just load the value
25424 directly. */
25425 operands[2] = adjust_address (operands[1], SImode, 4);
25427 output_asm_insn ("ldr\t%H0, %2", operands);
25428 output_asm_insn ("ldr\t%0, %1", operands);
25429 break;
25431 default:
25432 gcc_unreachable ();
25435 return "";
25438 const char *
25439 thumb_output_move_mem_multiple (int n, rtx *operands)
25441 rtx tmp;
25443 switch (n)
25445 case 2:
25446 if (REGNO (operands[4]) > REGNO (operands[5]))
25448 tmp = operands[4];
25449 operands[4] = operands[5];
25450 operands[5] = tmp;
25452 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25453 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25454 break;
25456 case 3:
25457 if (REGNO (operands[4]) > REGNO (operands[5]))
25458 std::swap (operands[4], operands[5]);
25459 if (REGNO (operands[5]) > REGNO (operands[6]))
25460 std::swap (operands[5], operands[6]);
25461 if (REGNO (operands[4]) > REGNO (operands[5]))
25462 std::swap (operands[4], operands[5]);
25464 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25465 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25466 break;
25468 default:
25469 gcc_unreachable ();
25472 return "";
25475 /* Output a call-via instruction for thumb state. */
25476 const char *
25477 thumb_call_via_reg (rtx reg)
25479 int regno = REGNO (reg);
25480 rtx *labelp;
25482 gcc_assert (regno < LR_REGNUM);
25484 /* If we are in the normal text section we can use a single instance
25485 per compilation unit. If we are doing function sections, then we need
25486 an entry per section, since we can't rely on reachability. */
25487 if (in_section == text_section)
25489 thumb_call_reg_needed = 1;
25491 if (thumb_call_via_label[regno] == NULL)
25492 thumb_call_via_label[regno] = gen_label_rtx ();
25493 labelp = thumb_call_via_label + regno;
25495 else
25497 if (cfun->machine->call_via[regno] == NULL)
25498 cfun->machine->call_via[regno] = gen_label_rtx ();
25499 labelp = cfun->machine->call_via + regno;
25502 output_asm_insn ("bl\t%a0", labelp);
25503 return "";
25506 /* Routines for generating rtl. */
25507 void
25508 thumb_expand_movmemqi (rtx *operands)
25510 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25511 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25512 HOST_WIDE_INT len = INTVAL (operands[2]);
25513 HOST_WIDE_INT offset = 0;
25515 while (len >= 12)
25517 emit_insn (gen_movmem12b (out, in, out, in));
25518 len -= 12;
25521 if (len >= 8)
25523 emit_insn (gen_movmem8b (out, in, out, in));
25524 len -= 8;
25527 if (len >= 4)
25529 rtx reg = gen_reg_rtx (SImode);
25530 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25531 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25532 len -= 4;
25533 offset += 4;
25536 if (len >= 2)
25538 rtx reg = gen_reg_rtx (HImode);
25539 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25540 plus_constant (Pmode, in,
25541 offset))));
25542 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25543 offset)),
25544 reg));
25545 len -= 2;
25546 offset += 2;
25549 if (len)
25551 rtx reg = gen_reg_rtx (QImode);
25552 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25553 plus_constant (Pmode, in,
25554 offset))));
25555 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25556 offset)),
25557 reg));
25561 void
25562 thumb_reload_out_hi (rtx *operands)
25564 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25567 /* Handle reading a half-word from memory during reload. */
25568 void
25569 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
25571 gcc_unreachable ();
25574 /* Return the length of a function name prefix
25575 that starts with the character 'c'. */
25576 static int
25577 arm_get_strip_length (int c)
25579 switch (c)
25581 ARM_NAME_ENCODING_LENGTHS
25582 default: return 0;
25586 /* Return a pointer to a function's name with any
25587 and all prefix encodings stripped from it. */
25588 const char *
25589 arm_strip_name_encoding (const char *name)
25591 int skip;
25593 while ((skip = arm_get_strip_length (* name)))
25594 name += skip;
25596 return name;
25599 /* If there is a '*' anywhere in the name's prefix, then
25600 emit the stripped name verbatim, otherwise prepend an
25601 underscore if leading underscores are being used. */
25602 void
25603 arm_asm_output_labelref (FILE *stream, const char *name)
25605 int skip;
25606 int verbatim = 0;
25608 while ((skip = arm_get_strip_length (* name)))
25610 verbatim |= (*name == '*');
25611 name += skip;
25614 if (verbatim)
25615 fputs (name, stream);
25616 else
25617 asm_fprintf (stream, "%U%s", name);
25620 /* This function is used to emit an EABI tag and its associated value.
25621 We emit the numerical value of the tag in case the assembler does not
25622 support textual tags. (Eg gas prior to 2.20). If requested we include
25623 the tag name in a comment so that anyone reading the assembler output
25624 will know which tag is being set.
25626 This function is not static because arm-c.c needs it too. */
25628 void
25629 arm_emit_eabi_attribute (const char *name, int num, int val)
25631 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
25632 if (flag_verbose_asm || flag_debug_asm)
25633 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
25634 asm_fprintf (asm_out_file, "\n");
25637 static void
25638 arm_file_start (void)
25640 int val;
25642 if (TARGET_UNIFIED_ASM)
25643 asm_fprintf (asm_out_file, "\t.syntax unified\n");
25645 if (TARGET_BPABI)
25647 const char *fpu_name;
25648 if (arm_selected_arch)
25650 /* armv7ve doesn't support any extensions. */
25651 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
25653 /* Keep backward compatability for assemblers
25654 which don't support armv7ve. */
25655 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
25656 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
25657 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
25658 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
25659 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
25661 else
25663 const char* pos = strchr (arm_selected_arch->name, '+');
25664 if (pos)
25666 char buf[15];
25667 gcc_assert (strlen (arm_selected_arch->name)
25668 <= sizeof (buf) / sizeof (*pos));
25669 strncpy (buf, arm_selected_arch->name,
25670 (pos - arm_selected_arch->name) * sizeof (*pos));
25671 buf[pos - arm_selected_arch->name] = '\0';
25672 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
25673 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
25675 else
25676 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
25679 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
25680 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
25681 else
25683 const char* truncated_name
25684 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
25685 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
25688 if (TARGET_SOFT_FLOAT)
25690 fpu_name = "softvfp";
25692 else
25694 fpu_name = arm_fpu_desc->name;
25695 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
25697 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
25698 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
25700 if (TARGET_HARD_FLOAT_ABI)
25701 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
25704 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
25706 /* Some of these attributes only apply when the corresponding features
25707 are used. However we don't have any easy way of figuring this out.
25708 Conservatively record the setting that would have been used. */
25710 if (flag_rounding_math)
25711 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
25713 if (!flag_unsafe_math_optimizations)
25715 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
25716 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25718 if (flag_signaling_nans)
25719 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25721 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25722 flag_finite_math_only ? 1 : 3);
25724 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25725 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25726 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25727 flag_short_enums ? 1 : 2);
25729 /* Tag_ABI_optimization_goals. */
25730 if (optimize_size)
25731 val = 4;
25732 else if (optimize >= 2)
25733 val = 2;
25734 else if (optimize)
25735 val = 1;
25736 else
25737 val = 6;
25738 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
25740 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
25741 unaligned_access);
25743 if (arm_fp16_format)
25744 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
25745 (int) arm_fp16_format);
25747 if (arm_lang_output_object_attributes_hook)
25748 arm_lang_output_object_attributes_hook();
25751 default_file_start ();
25754 static void
25755 arm_file_end (void)
25757 int regno;
25759 if (NEED_INDICATE_EXEC_STACK)
25760 /* Add .note.GNU-stack. */
25761 file_end_indicate_exec_stack ();
25763 if (! thumb_call_reg_needed)
25764 return;
25766 switch_to_section (text_section);
25767 asm_fprintf (asm_out_file, "\t.code 16\n");
25768 ASM_OUTPUT_ALIGN (asm_out_file, 1);
25770 for (regno = 0; regno < LR_REGNUM; regno++)
25772 rtx label = thumb_call_via_label[regno];
25774 if (label != 0)
25776 targetm.asm_out.internal_label (asm_out_file, "L",
25777 CODE_LABEL_NUMBER (label));
25778 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
25783 #ifndef ARM_PE
25784 /* Symbols in the text segment can be accessed without indirecting via the
25785 constant pool; it may take an extra binary operation, but this is still
25786 faster than indirecting via memory. Don't do this when not optimizing,
25787 since we won't be calculating al of the offsets necessary to do this
25788 simplification. */
25790 static void
25791 arm_encode_section_info (tree decl, rtx rtl, int first)
25793 if (optimize > 0 && TREE_CONSTANT (decl))
25794 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
25796 default_encode_section_info (decl, rtl, first);
25798 #endif /* !ARM_PE */
25800 static void
25801 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
25803 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
25804 && !strcmp (prefix, "L"))
25806 arm_ccfsm_state = 0;
25807 arm_target_insn = NULL;
25809 default_internal_label (stream, prefix, labelno);
25812 /* Output code to add DELTA to the first argument, and then jump
25813 to FUNCTION. Used for C++ multiple inheritance. */
25814 static void
25815 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
25816 HOST_WIDE_INT delta,
25817 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
25818 tree function)
25820 static int thunk_label = 0;
25821 char label[256];
25822 char labelpc[256];
25823 int mi_delta = delta;
25824 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
25825 int shift = 0;
25826 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
25827 ? 1 : 0);
25828 if (mi_delta < 0)
25829 mi_delta = - mi_delta;
25831 final_start_function (emit_barrier (), file, 1);
25833 if (TARGET_THUMB1)
25835 int labelno = thunk_label++;
25836 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
25837 /* Thunks are entered in arm mode when avaiable. */
25838 if (TARGET_THUMB1_ONLY)
25840 /* push r3 so we can use it as a temporary. */
25841 /* TODO: Omit this save if r3 is not used. */
25842 fputs ("\tpush {r3}\n", file);
25843 fputs ("\tldr\tr3, ", file);
25845 else
25847 fputs ("\tldr\tr12, ", file);
25849 assemble_name (file, label);
25850 fputc ('\n', file);
25851 if (flag_pic)
25853 /* If we are generating PIC, the ldr instruction below loads
25854 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
25855 the address of the add + 8, so we have:
25857 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
25858 = target + 1.
25860 Note that we have "+ 1" because some versions of GNU ld
25861 don't set the low bit of the result for R_ARM_REL32
25862 relocations against thumb function symbols.
25863 On ARMv6M this is +4, not +8. */
25864 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
25865 assemble_name (file, labelpc);
25866 fputs (":\n", file);
25867 if (TARGET_THUMB1_ONLY)
25869 /* This is 2 insns after the start of the thunk, so we know it
25870 is 4-byte aligned. */
25871 fputs ("\tadd\tr3, pc, r3\n", file);
25872 fputs ("\tmov r12, r3\n", file);
25874 else
25875 fputs ("\tadd\tr12, pc, r12\n", file);
25877 else if (TARGET_THUMB1_ONLY)
25878 fputs ("\tmov r12, r3\n", file);
25880 if (TARGET_THUMB1_ONLY)
25882 if (mi_delta > 255)
25884 fputs ("\tldr\tr3, ", file);
25885 assemble_name (file, label);
25886 fputs ("+4\n", file);
25887 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
25888 mi_op, this_regno, this_regno);
25890 else if (mi_delta != 0)
25892 /* Thumb1 unified syntax requires s suffix in instruction name when
25893 one of the operands is immediate. */
25894 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
25895 mi_op, this_regno, this_regno,
25896 mi_delta);
25899 else
25901 /* TODO: Use movw/movt for large constants when available. */
25902 while (mi_delta != 0)
25904 if ((mi_delta & (3 << shift)) == 0)
25905 shift += 2;
25906 else
25908 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
25909 mi_op, this_regno, this_regno,
25910 mi_delta & (0xff << shift));
25911 mi_delta &= ~(0xff << shift);
25912 shift += 8;
25916 if (TARGET_THUMB1)
25918 if (TARGET_THUMB1_ONLY)
25919 fputs ("\tpop\t{r3}\n", file);
25921 fprintf (file, "\tbx\tr12\n");
25922 ASM_OUTPUT_ALIGN (file, 2);
25923 assemble_name (file, label);
25924 fputs (":\n", file);
25925 if (flag_pic)
25927 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
25928 rtx tem = XEXP (DECL_RTL (function), 0);
25929 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
25930 pipeline offset is four rather than eight. Adjust the offset
25931 accordingly. */
25932 tem = plus_constant (GET_MODE (tem), tem,
25933 TARGET_THUMB1_ONLY ? -3 : -7);
25934 tem = gen_rtx_MINUS (GET_MODE (tem),
25935 tem,
25936 gen_rtx_SYMBOL_REF (Pmode,
25937 ggc_strdup (labelpc)));
25938 assemble_integer (tem, 4, BITS_PER_WORD, 1);
25940 else
25941 /* Output ".word .LTHUNKn". */
25942 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
25944 if (TARGET_THUMB1_ONLY && mi_delta > 255)
25945 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
25947 else
25949 fputs ("\tb\t", file);
25950 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
25951 if (NEED_PLT_RELOC)
25952 fputs ("(PLT)", file);
25953 fputc ('\n', file);
25956 final_end_function ();
25960 arm_emit_vector_const (FILE *file, rtx x)
25962 int i;
25963 const char * pattern;
25965 gcc_assert (GET_CODE (x) == CONST_VECTOR);
25967 switch (GET_MODE (x))
25969 case V2SImode: pattern = "%08x"; break;
25970 case V4HImode: pattern = "%04x"; break;
25971 case V8QImode: pattern = "%02x"; break;
25972 default: gcc_unreachable ();
25975 fprintf (file, "0x");
25976 for (i = CONST_VECTOR_NUNITS (x); i--;)
25978 rtx element;
25980 element = CONST_VECTOR_ELT (x, i);
25981 fprintf (file, pattern, INTVAL (element));
25984 return 1;
25987 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
25988 HFmode constant pool entries are actually loaded with ldr. */
25989 void
25990 arm_emit_fp16_const (rtx c)
25992 REAL_VALUE_TYPE r;
25993 long bits;
25995 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
25996 bits = real_to_target (NULL, &r, HFmode);
25997 if (WORDS_BIG_ENDIAN)
25998 assemble_zeros (2);
25999 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26000 if (!WORDS_BIG_ENDIAN)
26001 assemble_zeros (2);
26004 const char *
26005 arm_output_load_gr (rtx *operands)
26007 rtx reg;
26008 rtx offset;
26009 rtx wcgr;
26010 rtx sum;
26012 if (!MEM_P (operands [1])
26013 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26014 || !REG_P (reg = XEXP (sum, 0))
26015 || !CONST_INT_P (offset = XEXP (sum, 1))
26016 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26017 return "wldrw%?\t%0, %1";
26019 /* Fix up an out-of-range load of a GR register. */
26020 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26021 wcgr = operands[0];
26022 operands[0] = reg;
26023 output_asm_insn ("ldr%?\t%0, %1", operands);
26025 operands[0] = wcgr;
26026 operands[1] = reg;
26027 output_asm_insn ("tmcr%?\t%0, %1", operands);
26028 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26030 return "";
26033 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26035 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26036 named arg and all anonymous args onto the stack.
26037 XXX I know the prologue shouldn't be pushing registers, but it is faster
26038 that way. */
26040 static void
26041 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26042 machine_mode mode,
26043 tree type,
26044 int *pretend_size,
26045 int second_time ATTRIBUTE_UNUSED)
26047 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26048 int nregs;
26050 cfun->machine->uses_anonymous_args = 1;
26051 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26053 nregs = pcum->aapcs_ncrn;
26054 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
26055 nregs++;
26057 else
26058 nregs = pcum->nregs;
26060 if (nregs < NUM_ARG_REGS)
26061 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26064 /* We can't rely on the caller doing the proper promotion when
26065 using APCS or ATPCS. */
26067 static bool
26068 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26070 return !TARGET_AAPCS_BASED;
26073 static machine_mode
26074 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26075 machine_mode mode,
26076 int *punsignedp ATTRIBUTE_UNUSED,
26077 const_tree fntype ATTRIBUTE_UNUSED,
26078 int for_return ATTRIBUTE_UNUSED)
26080 if (GET_MODE_CLASS (mode) == MODE_INT
26081 && GET_MODE_SIZE (mode) < 4)
26082 return SImode;
26084 return mode;
26087 /* AAPCS based ABIs use short enums by default. */
26089 static bool
26090 arm_default_short_enums (void)
26092 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
26096 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26098 static bool
26099 arm_align_anon_bitfield (void)
26101 return TARGET_AAPCS_BASED;
26105 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26107 static tree
26108 arm_cxx_guard_type (void)
26110 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26114 /* The EABI says test the least significant bit of a guard variable. */
26116 static bool
26117 arm_cxx_guard_mask_bit (void)
26119 return TARGET_AAPCS_BASED;
26123 /* The EABI specifies that all array cookies are 8 bytes long. */
26125 static tree
26126 arm_get_cookie_size (tree type)
26128 tree size;
26130 if (!TARGET_AAPCS_BASED)
26131 return default_cxx_get_cookie_size (type);
26133 size = build_int_cst (sizetype, 8);
26134 return size;
26138 /* The EABI says that array cookies should also contain the element size. */
26140 static bool
26141 arm_cookie_has_size (void)
26143 return TARGET_AAPCS_BASED;
26147 /* The EABI says constructors and destructors should return a pointer to
26148 the object constructed/destroyed. */
26150 static bool
26151 arm_cxx_cdtor_returns_this (void)
26153 return TARGET_AAPCS_BASED;
26156 /* The EABI says that an inline function may never be the key
26157 method. */
26159 static bool
26160 arm_cxx_key_method_may_be_inline (void)
26162 return !TARGET_AAPCS_BASED;
26165 static void
26166 arm_cxx_determine_class_data_visibility (tree decl)
26168 if (!TARGET_AAPCS_BASED
26169 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26170 return;
26172 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26173 is exported. However, on systems without dynamic vague linkage,
26174 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26175 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26176 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26177 else
26178 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26179 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26182 static bool
26183 arm_cxx_class_data_always_comdat (void)
26185 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26186 vague linkage if the class has no key function. */
26187 return !TARGET_AAPCS_BASED;
26191 /* The EABI says __aeabi_atexit should be used to register static
26192 destructors. */
26194 static bool
26195 arm_cxx_use_aeabi_atexit (void)
26197 return TARGET_AAPCS_BASED;
26201 void
26202 arm_set_return_address (rtx source, rtx scratch)
26204 arm_stack_offsets *offsets;
26205 HOST_WIDE_INT delta;
26206 rtx addr;
26207 unsigned long saved_regs;
26209 offsets = arm_get_frame_offsets ();
26210 saved_regs = offsets->saved_regs_mask;
26212 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26213 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26214 else
26216 if (frame_pointer_needed)
26217 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26218 else
26220 /* LR will be the first saved register. */
26221 delta = offsets->outgoing_args - (offsets->frame + 4);
26224 if (delta >= 4096)
26226 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26227 GEN_INT (delta & ~4095)));
26228 addr = scratch;
26229 delta &= 4095;
26231 else
26232 addr = stack_pointer_rtx;
26234 addr = plus_constant (Pmode, addr, delta);
26236 /* The store needs to be marked as frame related in order to prevent
26237 DSE from deleting it as dead if it is based on fp. */
26238 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26239 RTX_FRAME_RELATED_P (insn) = 1;
26240 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26245 void
26246 thumb_set_return_address (rtx source, rtx scratch)
26248 arm_stack_offsets *offsets;
26249 HOST_WIDE_INT delta;
26250 HOST_WIDE_INT limit;
26251 int reg;
26252 rtx addr;
26253 unsigned long mask;
26255 emit_use (source);
26257 offsets = arm_get_frame_offsets ();
26258 mask = offsets->saved_regs_mask;
26259 if (mask & (1 << LR_REGNUM))
26261 limit = 1024;
26262 /* Find the saved regs. */
26263 if (frame_pointer_needed)
26265 delta = offsets->soft_frame - offsets->saved_args;
26266 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26267 if (TARGET_THUMB1)
26268 limit = 128;
26270 else
26272 delta = offsets->outgoing_args - offsets->saved_args;
26273 reg = SP_REGNUM;
26275 /* Allow for the stack frame. */
26276 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26277 delta -= 16;
26278 /* The link register is always the first saved register. */
26279 delta -= 4;
26281 /* Construct the address. */
26282 addr = gen_rtx_REG (SImode, reg);
26283 if (delta > limit)
26285 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26286 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26287 addr = scratch;
26289 else
26290 addr = plus_constant (Pmode, addr, delta);
26292 /* The store needs to be marked as frame related in order to prevent
26293 DSE from deleting it as dead if it is based on fp. */
26294 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26295 RTX_FRAME_RELATED_P (insn) = 1;
26296 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26298 else
26299 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26302 /* Implements target hook vector_mode_supported_p. */
26303 bool
26304 arm_vector_mode_supported_p (machine_mode mode)
26306 /* Neon also supports V2SImode, etc. listed in the clause below. */
26307 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26308 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
26309 return true;
26311 if ((TARGET_NEON || TARGET_IWMMXT)
26312 && ((mode == V2SImode)
26313 || (mode == V4HImode)
26314 || (mode == V8QImode)))
26315 return true;
26317 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26318 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26319 || mode == V2HAmode))
26320 return true;
26322 return false;
26325 /* Implements target hook array_mode_supported_p. */
26327 static bool
26328 arm_array_mode_supported_p (machine_mode mode,
26329 unsigned HOST_WIDE_INT nelems)
26331 if (TARGET_NEON
26332 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26333 && (nelems >= 2 && nelems <= 4))
26334 return true;
26336 return false;
26339 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26340 registers when autovectorizing for Neon, at least until multiple vector
26341 widths are supported properly by the middle-end. */
26343 static machine_mode
26344 arm_preferred_simd_mode (machine_mode mode)
26346 if (TARGET_NEON)
26347 switch (mode)
26349 case SFmode:
26350 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26351 case SImode:
26352 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26353 case HImode:
26354 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26355 case QImode:
26356 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26357 case DImode:
26358 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26359 return V2DImode;
26360 break;
26362 default:;
26365 if (TARGET_REALLY_IWMMXT)
26366 switch (mode)
26368 case SImode:
26369 return V2SImode;
26370 case HImode:
26371 return V4HImode;
26372 case QImode:
26373 return V8QImode;
26375 default:;
26378 return word_mode;
26381 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26383 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26384 using r0-r4 for function arguments, r7 for the stack frame and don't have
26385 enough left over to do doubleword arithmetic. For Thumb-2 all the
26386 potentially problematic instructions accept high registers so this is not
26387 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26388 that require many low registers. */
26389 static bool
26390 arm_class_likely_spilled_p (reg_class_t rclass)
26392 if ((TARGET_THUMB1 && rclass == LO_REGS)
26393 || rclass == CC_REG)
26394 return true;
26396 return false;
26399 /* Implements target hook small_register_classes_for_mode_p. */
26400 bool
26401 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26403 return TARGET_THUMB1;
26406 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26407 ARM insns and therefore guarantee that the shift count is modulo 256.
26408 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26409 guarantee no particular behavior for out-of-range counts. */
26411 static unsigned HOST_WIDE_INT
26412 arm_shift_truncation_mask (machine_mode mode)
26414 return mode == SImode ? 255 : 0;
26418 /* Map internal gcc register numbers to DWARF2 register numbers. */
26420 unsigned int
26421 arm_dbx_register_number (unsigned int regno)
26423 if (regno < 16)
26424 return regno;
26426 if (IS_VFP_REGNUM (regno))
26428 /* See comment in arm_dwarf_register_span. */
26429 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26430 return 64 + regno - FIRST_VFP_REGNUM;
26431 else
26432 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26435 if (IS_IWMMXT_GR_REGNUM (regno))
26436 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26438 if (IS_IWMMXT_REGNUM (regno))
26439 return 112 + regno - FIRST_IWMMXT_REGNUM;
26441 gcc_unreachable ();
26444 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26445 GCC models tham as 64 32-bit registers, so we need to describe this to
26446 the DWARF generation code. Other registers can use the default. */
26447 static rtx
26448 arm_dwarf_register_span (rtx rtl)
26450 machine_mode mode;
26451 unsigned regno;
26452 rtx parts[16];
26453 int nregs;
26454 int i;
26456 regno = REGNO (rtl);
26457 if (!IS_VFP_REGNUM (regno))
26458 return NULL_RTX;
26460 /* XXX FIXME: The EABI defines two VFP register ranges:
26461 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26462 256-287: D0-D31
26463 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26464 corresponding D register. Until GDB supports this, we shall use the
26465 legacy encodings. We also use these encodings for D0-D15 for
26466 compatibility with older debuggers. */
26467 mode = GET_MODE (rtl);
26468 if (GET_MODE_SIZE (mode) < 8)
26469 return NULL_RTX;
26471 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26473 nregs = GET_MODE_SIZE (mode) / 4;
26474 for (i = 0; i < nregs; i += 2)
26475 if (TARGET_BIG_END)
26477 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
26478 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
26480 else
26482 parts[i] = gen_rtx_REG (SImode, regno + i);
26483 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
26486 else
26488 nregs = GET_MODE_SIZE (mode) / 8;
26489 for (i = 0; i < nregs; i++)
26490 parts[i] = gen_rtx_REG (DImode, regno + i);
26493 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
26496 #if ARM_UNWIND_INFO
26497 /* Emit unwind directives for a store-multiple instruction or stack pointer
26498 push during alignment.
26499 These should only ever be generated by the function prologue code, so
26500 expect them to have a particular form.
26501 The store-multiple instruction sometimes pushes pc as the last register,
26502 although it should not be tracked into unwind information, or for -Os
26503 sometimes pushes some dummy registers before first register that needs
26504 to be tracked in unwind information; such dummy registers are there just
26505 to avoid separate stack adjustment, and will not be restored in the
26506 epilogue. */
26508 static void
26509 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26511 int i;
26512 HOST_WIDE_INT offset;
26513 HOST_WIDE_INT nregs;
26514 int reg_size;
26515 unsigned reg;
26516 unsigned lastreg;
26517 unsigned padfirst = 0, padlast = 0;
26518 rtx e;
26520 e = XVECEXP (p, 0, 0);
26521 gcc_assert (GET_CODE (e) == SET);
26523 /* First insn will adjust the stack pointer. */
26524 gcc_assert (GET_CODE (e) == SET
26525 && REG_P (SET_DEST (e))
26526 && REGNO (SET_DEST (e)) == SP_REGNUM
26527 && GET_CODE (SET_SRC (e)) == PLUS);
26529 offset = -INTVAL (XEXP (SET_SRC (e), 1));
26530 nregs = XVECLEN (p, 0) - 1;
26531 gcc_assert (nregs);
26533 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
26534 if (reg < 16)
26536 /* For -Os dummy registers can be pushed at the beginning to
26537 avoid separate stack pointer adjustment. */
26538 e = XVECEXP (p, 0, 1);
26539 e = XEXP (SET_DEST (e), 0);
26540 if (GET_CODE (e) == PLUS)
26541 padfirst = INTVAL (XEXP (e, 1));
26542 gcc_assert (padfirst == 0 || optimize_size);
26543 /* The function prologue may also push pc, but not annotate it as it is
26544 never restored. We turn this into a stack pointer adjustment. */
26545 e = XVECEXP (p, 0, nregs);
26546 e = XEXP (SET_DEST (e), 0);
26547 if (GET_CODE (e) == PLUS)
26548 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
26549 else
26550 padlast = offset - 4;
26551 gcc_assert (padlast == 0 || padlast == 4);
26552 if (padlast == 4)
26553 fprintf (asm_out_file, "\t.pad #4\n");
26554 reg_size = 4;
26555 fprintf (asm_out_file, "\t.save {");
26557 else if (IS_VFP_REGNUM (reg))
26559 reg_size = 8;
26560 fprintf (asm_out_file, "\t.vsave {");
26562 else
26563 /* Unknown register type. */
26564 gcc_unreachable ();
26566 /* If the stack increment doesn't match the size of the saved registers,
26567 something has gone horribly wrong. */
26568 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
26570 offset = padfirst;
26571 lastreg = 0;
26572 /* The remaining insns will describe the stores. */
26573 for (i = 1; i <= nregs; i++)
26575 /* Expect (set (mem <addr>) (reg)).
26576 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
26577 e = XVECEXP (p, 0, i);
26578 gcc_assert (GET_CODE (e) == SET
26579 && MEM_P (SET_DEST (e))
26580 && REG_P (SET_SRC (e)));
26582 reg = REGNO (SET_SRC (e));
26583 gcc_assert (reg >= lastreg);
26585 if (i != 1)
26586 fprintf (asm_out_file, ", ");
26587 /* We can't use %r for vfp because we need to use the
26588 double precision register names. */
26589 if (IS_VFP_REGNUM (reg))
26590 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
26591 else
26592 asm_fprintf (asm_out_file, "%r", reg);
26594 #ifdef ENABLE_CHECKING
26595 /* Check that the addresses are consecutive. */
26596 e = XEXP (SET_DEST (e), 0);
26597 if (GET_CODE (e) == PLUS)
26598 gcc_assert (REG_P (XEXP (e, 0))
26599 && REGNO (XEXP (e, 0)) == SP_REGNUM
26600 && CONST_INT_P (XEXP (e, 1))
26601 && offset == INTVAL (XEXP (e, 1)));
26602 else
26603 gcc_assert (i == 1
26604 && REG_P (e)
26605 && REGNO (e) == SP_REGNUM);
26606 offset += reg_size;
26607 #endif
26609 fprintf (asm_out_file, "}\n");
26610 if (padfirst)
26611 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
26614 /* Emit unwind directives for a SET. */
26616 static void
26617 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
26619 rtx e0;
26620 rtx e1;
26621 unsigned reg;
26623 e0 = XEXP (p, 0);
26624 e1 = XEXP (p, 1);
26625 switch (GET_CODE (e0))
26627 case MEM:
26628 /* Pushing a single register. */
26629 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
26630 || !REG_P (XEXP (XEXP (e0, 0), 0))
26631 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
26632 abort ();
26634 asm_fprintf (asm_out_file, "\t.save ");
26635 if (IS_VFP_REGNUM (REGNO (e1)))
26636 asm_fprintf(asm_out_file, "{d%d}\n",
26637 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
26638 else
26639 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
26640 break;
26642 case REG:
26643 if (REGNO (e0) == SP_REGNUM)
26645 /* A stack increment. */
26646 if (GET_CODE (e1) != PLUS
26647 || !REG_P (XEXP (e1, 0))
26648 || REGNO (XEXP (e1, 0)) != SP_REGNUM
26649 || !CONST_INT_P (XEXP (e1, 1)))
26650 abort ();
26652 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
26653 -INTVAL (XEXP (e1, 1)));
26655 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
26657 HOST_WIDE_INT offset;
26659 if (GET_CODE (e1) == PLUS)
26661 if (!REG_P (XEXP (e1, 0))
26662 || !CONST_INT_P (XEXP (e1, 1)))
26663 abort ();
26664 reg = REGNO (XEXP (e1, 0));
26665 offset = INTVAL (XEXP (e1, 1));
26666 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
26667 HARD_FRAME_POINTER_REGNUM, reg,
26668 offset);
26670 else if (REG_P (e1))
26672 reg = REGNO (e1);
26673 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
26674 HARD_FRAME_POINTER_REGNUM, reg);
26676 else
26677 abort ();
26679 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
26681 /* Move from sp to reg. */
26682 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
26684 else if (GET_CODE (e1) == PLUS
26685 && REG_P (XEXP (e1, 0))
26686 && REGNO (XEXP (e1, 0)) == SP_REGNUM
26687 && CONST_INT_P (XEXP (e1, 1)))
26689 /* Set reg to offset from sp. */
26690 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
26691 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
26693 else
26694 abort ();
26695 break;
26697 default:
26698 abort ();
26703 /* Emit unwind directives for the given insn. */
26705 static void
26706 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
26708 rtx note, pat;
26709 bool handled_one = false;
26711 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26712 return;
26714 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26715 && (TREE_NOTHROW (current_function_decl)
26716 || crtl->all_throwers_are_sibcalls))
26717 return;
26719 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
26720 return;
26722 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
26724 switch (REG_NOTE_KIND (note))
26726 case REG_FRAME_RELATED_EXPR:
26727 pat = XEXP (note, 0);
26728 goto found;
26730 case REG_CFA_REGISTER:
26731 pat = XEXP (note, 0);
26732 if (pat == NULL)
26734 pat = PATTERN (insn);
26735 if (GET_CODE (pat) == PARALLEL)
26736 pat = XVECEXP (pat, 0, 0);
26739 /* Only emitted for IS_STACKALIGN re-alignment. */
26741 rtx dest, src;
26742 unsigned reg;
26744 src = SET_SRC (pat);
26745 dest = SET_DEST (pat);
26747 gcc_assert (src == stack_pointer_rtx);
26748 reg = REGNO (dest);
26749 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26750 reg + 0x90, reg);
26752 handled_one = true;
26753 break;
26755 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
26756 to get correct dwarf information for shrink-wrap. We should not
26757 emit unwind information for it because these are used either for
26758 pretend arguments or notes to adjust sp and restore registers from
26759 stack. */
26760 case REG_CFA_DEF_CFA:
26761 case REG_CFA_ADJUST_CFA:
26762 case REG_CFA_RESTORE:
26763 return;
26765 case REG_CFA_EXPRESSION:
26766 case REG_CFA_OFFSET:
26767 /* ??? Only handling here what we actually emit. */
26768 gcc_unreachable ();
26770 default:
26771 break;
26774 if (handled_one)
26775 return;
26776 pat = PATTERN (insn);
26777 found:
26779 switch (GET_CODE (pat))
26781 case SET:
26782 arm_unwind_emit_set (asm_out_file, pat);
26783 break;
26785 case SEQUENCE:
26786 /* Store multiple. */
26787 arm_unwind_emit_sequence (asm_out_file, pat);
26788 break;
26790 default:
26791 abort();
26796 /* Output a reference from a function exception table to the type_info
26797 object X. The EABI specifies that the symbol should be relocated by
26798 an R_ARM_TARGET2 relocation. */
26800 static bool
26801 arm_output_ttype (rtx x)
26803 fputs ("\t.word\t", asm_out_file);
26804 output_addr_const (asm_out_file, x);
26805 /* Use special relocations for symbol references. */
26806 if (!CONST_INT_P (x))
26807 fputs ("(TARGET2)", asm_out_file);
26808 fputc ('\n', asm_out_file);
26810 return TRUE;
26813 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
26815 static void
26816 arm_asm_emit_except_personality (rtx personality)
26818 fputs ("\t.personality\t", asm_out_file);
26819 output_addr_const (asm_out_file, personality);
26820 fputc ('\n', asm_out_file);
26823 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
26825 static void
26826 arm_asm_init_sections (void)
26828 exception_section = get_unnamed_section (0, output_section_asm_op,
26829 "\t.handlerdata");
26831 #endif /* ARM_UNWIND_INFO */
26833 /* Output unwind directives for the start/end of a function. */
26835 void
26836 arm_output_fn_unwind (FILE * f, bool prologue)
26838 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26839 return;
26841 if (prologue)
26842 fputs ("\t.fnstart\n", f);
26843 else
26845 /* If this function will never be unwound, then mark it as such.
26846 The came condition is used in arm_unwind_emit to suppress
26847 the frame annotations. */
26848 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26849 && (TREE_NOTHROW (current_function_decl)
26850 || crtl->all_throwers_are_sibcalls))
26851 fputs("\t.cantunwind\n", f);
26853 fputs ("\t.fnend\n", f);
26857 static bool
26858 arm_emit_tls_decoration (FILE *fp, rtx x)
26860 enum tls_reloc reloc;
26861 rtx val;
26863 val = XVECEXP (x, 0, 0);
26864 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
26866 output_addr_const (fp, val);
26868 switch (reloc)
26870 case TLS_GD32:
26871 fputs ("(tlsgd)", fp);
26872 break;
26873 case TLS_LDM32:
26874 fputs ("(tlsldm)", fp);
26875 break;
26876 case TLS_LDO32:
26877 fputs ("(tlsldo)", fp);
26878 break;
26879 case TLS_IE32:
26880 fputs ("(gottpoff)", fp);
26881 break;
26882 case TLS_LE32:
26883 fputs ("(tpoff)", fp);
26884 break;
26885 case TLS_DESCSEQ:
26886 fputs ("(tlsdesc)", fp);
26887 break;
26888 default:
26889 gcc_unreachable ();
26892 switch (reloc)
26894 case TLS_GD32:
26895 case TLS_LDM32:
26896 case TLS_IE32:
26897 case TLS_DESCSEQ:
26898 fputs (" + (. - ", fp);
26899 output_addr_const (fp, XVECEXP (x, 0, 2));
26900 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
26901 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
26902 output_addr_const (fp, XVECEXP (x, 0, 3));
26903 fputc (')', fp);
26904 break;
26905 default:
26906 break;
26909 return TRUE;
26912 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
26914 static void
26915 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
26917 gcc_assert (size == 4);
26918 fputs ("\t.word\t", file);
26919 output_addr_const (file, x);
26920 fputs ("(tlsldo)", file);
26923 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
26925 static bool
26926 arm_output_addr_const_extra (FILE *fp, rtx x)
26928 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
26929 return arm_emit_tls_decoration (fp, x);
26930 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
26932 char label[256];
26933 int labelno = INTVAL (XVECEXP (x, 0, 0));
26935 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
26936 assemble_name_raw (fp, label);
26938 return TRUE;
26940 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
26942 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
26943 if (GOT_PCREL)
26944 fputs ("+.", fp);
26945 fputs ("-(", fp);
26946 output_addr_const (fp, XVECEXP (x, 0, 0));
26947 fputc (')', fp);
26948 return TRUE;
26950 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
26952 output_addr_const (fp, XVECEXP (x, 0, 0));
26953 if (GOT_PCREL)
26954 fputs ("+.", fp);
26955 fputs ("-(", fp);
26956 output_addr_const (fp, XVECEXP (x, 0, 1));
26957 fputc (')', fp);
26958 return TRUE;
26960 else if (GET_CODE (x) == CONST_VECTOR)
26961 return arm_emit_vector_const (fp, x);
26963 return FALSE;
26966 /* Output assembly for a shift instruction.
26967 SET_FLAGS determines how the instruction modifies the condition codes.
26968 0 - Do not set condition codes.
26969 1 - Set condition codes.
26970 2 - Use smallest instruction. */
26971 const char *
26972 arm_output_shift(rtx * operands, int set_flags)
26974 char pattern[100];
26975 static const char flag_chars[3] = {'?', '.', '!'};
26976 const char *shift;
26977 HOST_WIDE_INT val;
26978 char c;
26980 c = flag_chars[set_flags];
26981 if (TARGET_UNIFIED_ASM)
26983 shift = shift_op(operands[3], &val);
26984 if (shift)
26986 if (val != -1)
26987 operands[2] = GEN_INT(val);
26988 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
26990 else
26991 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
26993 else
26994 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
26995 output_asm_insn (pattern, operands);
26996 return "";
26999 /* Output assembly for a WMMX immediate shift instruction. */
27000 const char *
27001 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27003 int shift = INTVAL (operands[2]);
27004 char templ[50];
27005 machine_mode opmode = GET_MODE (operands[0]);
27007 gcc_assert (shift >= 0);
27009 /* If the shift value in the register versions is > 63 (for D qualifier),
27010 31 (for W qualifier) or 15 (for H qualifier). */
27011 if (((opmode == V4HImode) && (shift > 15))
27012 || ((opmode == V2SImode) && (shift > 31))
27013 || ((opmode == DImode) && (shift > 63)))
27015 if (wror_or_wsra)
27017 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27018 output_asm_insn (templ, operands);
27019 if (opmode == DImode)
27021 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27022 output_asm_insn (templ, operands);
27025 else
27027 /* The destination register will contain all zeros. */
27028 sprintf (templ, "wzero\t%%0");
27029 output_asm_insn (templ, operands);
27031 return "";
27034 if ((opmode == DImode) && (shift > 32))
27036 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27037 output_asm_insn (templ, operands);
27038 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27039 output_asm_insn (templ, operands);
27041 else
27043 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27044 output_asm_insn (templ, operands);
27046 return "";
27049 /* Output assembly for a WMMX tinsr instruction. */
27050 const char *
27051 arm_output_iwmmxt_tinsr (rtx *operands)
27053 int mask = INTVAL (operands[3]);
27054 int i;
27055 char templ[50];
27056 int units = mode_nunits[GET_MODE (operands[0])];
27057 gcc_assert ((mask & (mask - 1)) == 0);
27058 for (i = 0; i < units; ++i)
27060 if ((mask & 0x01) == 1)
27062 break;
27064 mask >>= 1;
27066 gcc_assert (i < units);
27068 switch (GET_MODE (operands[0]))
27070 case V8QImode:
27071 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27072 break;
27073 case V4HImode:
27074 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27075 break;
27076 case V2SImode:
27077 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27078 break;
27079 default:
27080 gcc_unreachable ();
27081 break;
27083 output_asm_insn (templ, operands);
27085 return "";
27088 /* Output a Thumb-1 casesi dispatch sequence. */
27089 const char *
27090 thumb1_output_casesi (rtx *operands)
27092 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27094 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27096 switch (GET_MODE(diff_vec))
27098 case QImode:
27099 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27100 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27101 case HImode:
27102 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27103 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27104 case SImode:
27105 return "bl\t%___gnu_thumb1_case_si";
27106 default:
27107 gcc_unreachable ();
27111 /* Output a Thumb-2 casesi instruction. */
27112 const char *
27113 thumb2_output_casesi (rtx *operands)
27115 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27117 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27119 output_asm_insn ("cmp\t%0, %1", operands);
27120 output_asm_insn ("bhi\t%l3", operands);
27121 switch (GET_MODE(diff_vec))
27123 case QImode:
27124 return "tbb\t[%|pc, %0]";
27125 case HImode:
27126 return "tbh\t[%|pc, %0, lsl #1]";
27127 case SImode:
27128 if (flag_pic)
27130 output_asm_insn ("adr\t%4, %l2", operands);
27131 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27132 output_asm_insn ("add\t%4, %4, %5", operands);
27133 return "bx\t%4";
27135 else
27137 output_asm_insn ("adr\t%4, %l2", operands);
27138 return "ldr\t%|pc, [%4, %0, lsl #2]";
27140 default:
27141 gcc_unreachable ();
27145 /* Most ARM cores are single issue, but some newer ones can dual issue.
27146 The scheduler descriptions rely on this being correct. */
27147 static int
27148 arm_issue_rate (void)
27150 switch (arm_tune)
27152 case xgene1:
27153 return 4;
27155 case cortexa15:
27156 case cortexa57:
27157 return 3;
27159 case cortexm7:
27160 case cortexr4:
27161 case cortexr4f:
27162 case cortexr5:
27163 case genericv7a:
27164 case cortexa5:
27165 case cortexa7:
27166 case cortexa8:
27167 case cortexa9:
27168 case cortexa12:
27169 case cortexa17:
27170 case cortexa53:
27171 case fa726te:
27172 case marvell_pj4:
27173 return 2;
27175 default:
27176 return 1;
27180 /* Return how many instructions should scheduler lookahead to choose the
27181 best one. */
27182 static int
27183 arm_first_cycle_multipass_dfa_lookahead (void)
27185 int issue_rate = arm_issue_rate ();
27187 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27190 /* Enable modeling of L2 auto-prefetcher. */
27191 static int
27192 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27194 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27197 const char *
27198 arm_mangle_type (const_tree type)
27200 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27201 has to be managled as if it is in the "std" namespace. */
27202 if (TARGET_AAPCS_BASED
27203 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27204 return "St9__va_list";
27206 /* Half-precision float. */
27207 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27208 return "Dh";
27210 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27211 builtin type. */
27212 if (TYPE_NAME (type) != NULL)
27213 return arm_mangle_builtin_type (type);
27215 /* Use the default mangling. */
27216 return NULL;
27219 /* Order of allocation of core registers for Thumb: this allocation is
27220 written over the corresponding initial entries of the array
27221 initialized with REG_ALLOC_ORDER. We allocate all low registers
27222 first. Saving and restoring a low register is usually cheaper than
27223 using a call-clobbered high register. */
27225 static const int thumb_core_reg_alloc_order[] =
27227 3, 2, 1, 0, 4, 5, 6, 7,
27228 14, 12, 8, 9, 10, 11
27231 /* Adjust register allocation order when compiling for Thumb. */
27233 void
27234 arm_order_regs_for_local_alloc (void)
27236 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27237 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27238 if (TARGET_THUMB)
27239 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27240 sizeof (thumb_core_reg_alloc_order));
27243 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27245 bool
27246 arm_frame_pointer_required (void)
27248 return (cfun->has_nonlocal_label
27249 || SUBTARGET_FRAME_POINTER_REQUIRED
27250 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
27253 /* Only thumb1 can't support conditional execution, so return true if
27254 the target is not thumb1. */
27255 static bool
27256 arm_have_conditional_execution (void)
27258 return !TARGET_THUMB1;
27261 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27262 static HOST_WIDE_INT
27263 arm_vector_alignment (const_tree type)
27265 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27267 if (TARGET_AAPCS_BASED)
27268 align = MIN (align, 64);
27270 return align;
27273 static unsigned int
27274 arm_autovectorize_vector_sizes (void)
27276 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27279 static bool
27280 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27282 /* Vectors which aren't in packed structures will not be less aligned than
27283 the natural alignment of their element type, so this is safe. */
27284 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27285 return !is_packed;
27287 return default_builtin_vector_alignment_reachable (type, is_packed);
27290 static bool
27291 arm_builtin_support_vector_misalignment (machine_mode mode,
27292 const_tree type, int misalignment,
27293 bool is_packed)
27295 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27297 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27299 if (is_packed)
27300 return align == 1;
27302 /* If the misalignment is unknown, we should be able to handle the access
27303 so long as it is not to a member of a packed data structure. */
27304 if (misalignment == -1)
27305 return true;
27307 /* Return true if the misalignment is a multiple of the natural alignment
27308 of the vector's element type. This is probably always going to be
27309 true in practice, since we've already established that this isn't a
27310 packed access. */
27311 return ((misalignment % align) == 0);
27314 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27315 is_packed);
27318 static void
27319 arm_conditional_register_usage (void)
27321 int regno;
27323 if (TARGET_THUMB1 && optimize_size)
27325 /* When optimizing for size on Thumb-1, it's better not
27326 to use the HI regs, because of the overhead of
27327 stacking them. */
27328 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27329 fixed_regs[regno] = call_used_regs[regno] = 1;
27332 /* The link register can be clobbered by any branch insn,
27333 but we have no way to track that at present, so mark
27334 it as unavailable. */
27335 if (TARGET_THUMB1)
27336 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27338 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
27340 /* VFPv3 registers are disabled when earlier VFP
27341 versions are selected due to the definition of
27342 LAST_VFP_REGNUM. */
27343 for (regno = FIRST_VFP_REGNUM;
27344 regno <= LAST_VFP_REGNUM; ++ regno)
27346 fixed_regs[regno] = 0;
27347 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27348 || regno >= FIRST_VFP_REGNUM + 32;
27352 if (TARGET_REALLY_IWMMXT)
27354 regno = FIRST_IWMMXT_GR_REGNUM;
27355 /* The 2002/10/09 revision of the XScale ABI has wCG0
27356 and wCG1 as call-preserved registers. The 2002/11/21
27357 revision changed this so that all wCG registers are
27358 scratch registers. */
27359 for (regno = FIRST_IWMMXT_GR_REGNUM;
27360 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27361 fixed_regs[regno] = 0;
27362 /* The XScale ABI has wR0 - wR9 as scratch registers,
27363 the rest as call-preserved registers. */
27364 for (regno = FIRST_IWMMXT_REGNUM;
27365 regno <= LAST_IWMMXT_REGNUM; ++ regno)
27367 fixed_regs[regno] = 0;
27368 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27372 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27374 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27375 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27377 else if (TARGET_APCS_STACK)
27379 fixed_regs[10] = 1;
27380 call_used_regs[10] = 1;
27382 /* -mcaller-super-interworking reserves r11 for calls to
27383 _interwork_r11_call_via_rN(). Making the register global
27384 is an easy way of ensuring that it remains valid for all
27385 calls. */
27386 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27387 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27389 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27390 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27391 if (TARGET_CALLER_INTERWORKING)
27392 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27394 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27397 static reg_class_t
27398 arm_preferred_rename_class (reg_class_t rclass)
27400 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27401 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27402 and code size can be reduced. */
27403 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27404 return LO_REGS;
27405 else
27406 return NO_REGS;
27409 /* Compute the atrribute "length" of insn "*push_multi".
27410 So this function MUST be kept in sync with that insn pattern. */
27412 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27414 int i, regno, hi_reg;
27415 int num_saves = XVECLEN (parallel_op, 0);
27417 /* ARM mode. */
27418 if (TARGET_ARM)
27419 return 4;
27420 /* Thumb1 mode. */
27421 if (TARGET_THUMB1)
27422 return 2;
27424 /* Thumb2 mode. */
27425 regno = REGNO (first_op);
27426 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27427 for (i = 1; i < num_saves && !hi_reg; i++)
27429 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27430 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27433 if (!hi_reg)
27434 return 2;
27435 return 4;
27438 /* Compute the number of instructions emitted by output_move_double. */
27440 arm_count_output_move_double_insns (rtx *operands)
27442 int count;
27443 rtx ops[2];
27444 /* output_move_double may modify the operands array, so call it
27445 here on a copy of the array. */
27446 ops[0] = operands[0];
27447 ops[1] = operands[1];
27448 output_move_double (ops, false, &count);
27449 return count;
27453 vfp3_const_double_for_fract_bits (rtx operand)
27455 REAL_VALUE_TYPE r0;
27457 if (!CONST_DOUBLE_P (operand))
27458 return 0;
27460 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27461 if (exact_real_inverse (DFmode, &r0))
27463 if (exact_real_truncate (DFmode, &r0))
27465 HOST_WIDE_INT value = real_to_integer (&r0);
27466 value = value & 0xffffffff;
27467 if ((value != 0) && ( (value & (value - 1)) == 0))
27468 return int_log2 (value);
27471 return 0;
27475 vfp3_const_double_for_bits (rtx operand)
27477 REAL_VALUE_TYPE r0;
27479 if (!CONST_DOUBLE_P (operand))
27480 return 0;
27482 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27483 if (exact_real_truncate (DFmode, &r0))
27485 HOST_WIDE_INT value = real_to_integer (&r0);
27486 value = value & 0xffffffff;
27487 if ((value != 0) && ( (value & (value - 1)) == 0))
27488 return int_log2 (value);
27491 return 0;
27494 /* Emit a memory barrier around an atomic sequence according to MODEL. */
27496 static void
27497 arm_pre_atomic_barrier (enum memmodel model)
27499 if (need_atomic_barrier_p (model, true))
27500 emit_insn (gen_memory_barrier ());
27503 static void
27504 arm_post_atomic_barrier (enum memmodel model)
27506 if (need_atomic_barrier_p (model, false))
27507 emit_insn (gen_memory_barrier ());
27510 /* Emit the load-exclusive and store-exclusive instructions.
27511 Use acquire and release versions if necessary. */
27513 static void
27514 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
27516 rtx (*gen) (rtx, rtx);
27518 if (acq)
27520 switch (mode)
27522 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
27523 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
27524 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
27525 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
27526 default:
27527 gcc_unreachable ();
27530 else
27532 switch (mode)
27534 case QImode: gen = gen_arm_load_exclusiveqi; break;
27535 case HImode: gen = gen_arm_load_exclusivehi; break;
27536 case SImode: gen = gen_arm_load_exclusivesi; break;
27537 case DImode: gen = gen_arm_load_exclusivedi; break;
27538 default:
27539 gcc_unreachable ();
27543 emit_insn (gen (rval, mem));
27546 static void
27547 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
27548 rtx mem, bool rel)
27550 rtx (*gen) (rtx, rtx, rtx);
27552 if (rel)
27554 switch (mode)
27556 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
27557 case HImode: gen = gen_arm_store_release_exclusivehi; break;
27558 case SImode: gen = gen_arm_store_release_exclusivesi; break;
27559 case DImode: gen = gen_arm_store_release_exclusivedi; break;
27560 default:
27561 gcc_unreachable ();
27564 else
27566 switch (mode)
27568 case QImode: gen = gen_arm_store_exclusiveqi; break;
27569 case HImode: gen = gen_arm_store_exclusivehi; break;
27570 case SImode: gen = gen_arm_store_exclusivesi; break;
27571 case DImode: gen = gen_arm_store_exclusivedi; break;
27572 default:
27573 gcc_unreachable ();
27577 emit_insn (gen (bval, rval, mem));
27580 /* Mark the previous jump instruction as unlikely. */
27582 static void
27583 emit_unlikely_jump (rtx insn)
27585 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
27587 insn = emit_jump_insn (insn);
27588 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
27591 /* Expand a compare and swap pattern. */
27593 void
27594 arm_expand_compare_and_swap (rtx operands[])
27596 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
27597 machine_mode mode;
27598 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
27600 bval = operands[0];
27601 rval = operands[1];
27602 mem = operands[2];
27603 oldval = operands[3];
27604 newval = operands[4];
27605 is_weak = operands[5];
27606 mod_s = operands[6];
27607 mod_f = operands[7];
27608 mode = GET_MODE (mem);
27610 /* Normally the succ memory model must be stronger than fail, but in the
27611 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27612 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
27614 if (TARGET_HAVE_LDACQ
27615 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
27616 && INTVAL (mod_s) == MEMMODEL_RELEASE)
27617 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
27619 switch (mode)
27621 case QImode:
27622 case HImode:
27623 /* For narrow modes, we're going to perform the comparison in SImode,
27624 so do the zero-extension now. */
27625 rval = gen_reg_rtx (SImode);
27626 oldval = convert_modes (SImode, mode, oldval, true);
27627 /* FALLTHRU */
27629 case SImode:
27630 /* Force the value into a register if needed. We waited until after
27631 the zero-extension above to do this properly. */
27632 if (!arm_add_operand (oldval, SImode))
27633 oldval = force_reg (SImode, oldval);
27634 break;
27636 case DImode:
27637 if (!cmpdi_operand (oldval, mode))
27638 oldval = force_reg (mode, oldval);
27639 break;
27641 default:
27642 gcc_unreachable ();
27645 switch (mode)
27647 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
27648 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
27649 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
27650 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
27651 default:
27652 gcc_unreachable ();
27655 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
27657 if (mode == QImode || mode == HImode)
27658 emit_move_insn (operands[1], gen_lowpart (mode, rval));
27660 /* In all cases, we arrange for success to be signaled by Z set.
27661 This arrangement allows for the boolean result to be used directly
27662 in a subsequent branch, post optimization. */
27663 x = gen_rtx_REG (CCmode, CC_REGNUM);
27664 x = gen_rtx_EQ (SImode, x, const0_rtx);
27665 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
27668 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
27669 another memory store between the load-exclusive and store-exclusive can
27670 reset the monitor from Exclusive to Open state. This means we must wait
27671 until after reload to split the pattern, lest we get a register spill in
27672 the middle of the atomic sequence. */
27674 void
27675 arm_split_compare_and_swap (rtx operands[])
27677 rtx rval, mem, oldval, newval, scratch;
27678 machine_mode mode;
27679 enum memmodel mod_s, mod_f;
27680 bool is_weak;
27681 rtx_code_label *label1, *label2;
27682 rtx x, cond;
27684 rval = operands[0];
27685 mem = operands[1];
27686 oldval = operands[2];
27687 newval = operands[3];
27688 is_weak = (operands[4] != const0_rtx);
27689 mod_s = (enum memmodel) INTVAL (operands[5]);
27690 mod_f = (enum memmodel) INTVAL (operands[6]);
27691 scratch = operands[7];
27692 mode = GET_MODE (mem);
27694 bool use_acquire = TARGET_HAVE_LDACQ
27695 && !(mod_s == MEMMODEL_RELAXED
27696 || mod_s == MEMMODEL_CONSUME
27697 || mod_s == MEMMODEL_RELEASE);
27699 bool use_release = TARGET_HAVE_LDACQ
27700 && !(mod_s == MEMMODEL_RELAXED
27701 || mod_s == MEMMODEL_CONSUME
27702 || mod_s == MEMMODEL_ACQUIRE);
27704 /* Checks whether a barrier is needed and emits one accordingly. */
27705 if (!(use_acquire || use_release))
27706 arm_pre_atomic_barrier (mod_s);
27708 label1 = NULL;
27709 if (!is_weak)
27711 label1 = gen_label_rtx ();
27712 emit_label (label1);
27714 label2 = gen_label_rtx ();
27716 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
27718 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
27719 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27720 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27721 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
27722 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
27724 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
27726 /* Weak or strong, we want EQ to be true for success, so that we
27727 match the flags that we got from the compare above. */
27728 cond = gen_rtx_REG (CCmode, CC_REGNUM);
27729 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
27730 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
27732 if (!is_weak)
27734 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27735 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27736 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
27737 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
27740 if (mod_f != MEMMODEL_RELAXED)
27741 emit_label (label2);
27743 /* Checks whether a barrier is needed and emits one accordingly. */
27744 if (!(use_acquire || use_release))
27745 arm_post_atomic_barrier (mod_s);
27747 if (mod_f == MEMMODEL_RELAXED)
27748 emit_label (label2);
27751 void
27752 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
27753 rtx value, rtx model_rtx, rtx cond)
27755 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
27756 machine_mode mode = GET_MODE (mem);
27757 machine_mode wmode = (mode == DImode ? DImode : SImode);
27758 rtx_code_label *label;
27759 rtx x;
27761 bool use_acquire = TARGET_HAVE_LDACQ
27762 && !(model == MEMMODEL_RELAXED
27763 || model == MEMMODEL_CONSUME
27764 || model == MEMMODEL_RELEASE);
27766 bool use_release = TARGET_HAVE_LDACQ
27767 && !(model == MEMMODEL_RELAXED
27768 || model == MEMMODEL_CONSUME
27769 || model == MEMMODEL_ACQUIRE);
27771 /* Checks whether a barrier is needed and emits one accordingly. */
27772 if (!(use_acquire || use_release))
27773 arm_pre_atomic_barrier (model);
27775 label = gen_label_rtx ();
27776 emit_label (label);
27778 if (new_out)
27779 new_out = gen_lowpart (wmode, new_out);
27780 if (old_out)
27781 old_out = gen_lowpart (wmode, old_out);
27782 else
27783 old_out = new_out;
27784 value = simplify_gen_subreg (wmode, value, mode, 0);
27786 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
27788 switch (code)
27790 case SET:
27791 new_out = value;
27792 break;
27794 case NOT:
27795 x = gen_rtx_AND (wmode, old_out, value);
27796 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27797 x = gen_rtx_NOT (wmode, new_out);
27798 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27799 break;
27801 case MINUS:
27802 if (CONST_INT_P (value))
27804 value = GEN_INT (-INTVAL (value));
27805 code = PLUS;
27807 /* FALLTHRU */
27809 case PLUS:
27810 if (mode == DImode)
27812 /* DImode plus/minus need to clobber flags. */
27813 /* The adddi3 and subdi3 patterns are incorrectly written so that
27814 they require matching operands, even when we could easily support
27815 three operands. Thankfully, this can be fixed up post-splitting,
27816 as the individual add+adc patterns do accept three operands and
27817 post-reload cprop can make these moves go away. */
27818 emit_move_insn (new_out, old_out);
27819 if (code == PLUS)
27820 x = gen_adddi3 (new_out, new_out, value);
27821 else
27822 x = gen_subdi3 (new_out, new_out, value);
27823 emit_insn (x);
27824 break;
27826 /* FALLTHRU */
27828 default:
27829 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
27830 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27831 break;
27834 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
27835 use_release);
27837 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27838 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
27840 /* Checks whether a barrier is needed and emits one accordingly. */
27841 if (!(use_acquire || use_release))
27842 arm_post_atomic_barrier (model);
27845 #define MAX_VECT_LEN 16
27847 struct expand_vec_perm_d
27849 rtx target, op0, op1;
27850 unsigned char perm[MAX_VECT_LEN];
27851 machine_mode vmode;
27852 unsigned char nelt;
27853 bool one_vector_p;
27854 bool testing_p;
27857 /* Generate a variable permutation. */
27859 static void
27860 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
27862 machine_mode vmode = GET_MODE (target);
27863 bool one_vector_p = rtx_equal_p (op0, op1);
27865 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
27866 gcc_checking_assert (GET_MODE (op0) == vmode);
27867 gcc_checking_assert (GET_MODE (op1) == vmode);
27868 gcc_checking_assert (GET_MODE (sel) == vmode);
27869 gcc_checking_assert (TARGET_NEON);
27871 if (one_vector_p)
27873 if (vmode == V8QImode)
27874 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
27875 else
27876 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
27878 else
27880 rtx pair;
27882 if (vmode == V8QImode)
27884 pair = gen_reg_rtx (V16QImode);
27885 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
27886 pair = gen_lowpart (TImode, pair);
27887 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
27889 else
27891 pair = gen_reg_rtx (OImode);
27892 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
27893 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
27898 void
27899 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
27901 machine_mode vmode = GET_MODE (target);
27902 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
27903 bool one_vector_p = rtx_equal_p (op0, op1);
27904 rtx rmask[MAX_VECT_LEN], mask;
27906 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
27907 numbering of elements for big-endian, we must reverse the order. */
27908 gcc_checking_assert (!BYTES_BIG_ENDIAN);
27910 /* The VTBL instruction does not use a modulo index, so we must take care
27911 of that ourselves. */
27912 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
27913 for (i = 0; i < nelt; ++i)
27914 rmask[i] = mask;
27915 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
27916 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
27918 arm_expand_vec_perm_1 (target, op0, op1, sel);
27921 /* Generate or test for an insn that supports a constant permutation. */
27923 /* Recognize patterns for the VUZP insns. */
27925 static bool
27926 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
27928 unsigned int i, odd, mask, nelt = d->nelt;
27929 rtx out0, out1, in0, in1, x;
27930 rtx (*gen)(rtx, rtx, rtx, rtx);
27932 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27933 return false;
27935 /* Note that these are little-endian tests. Adjust for big-endian later. */
27936 if (d->perm[0] == 0)
27937 odd = 0;
27938 else if (d->perm[0] == 1)
27939 odd = 1;
27940 else
27941 return false;
27942 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27944 for (i = 0; i < nelt; i++)
27946 unsigned elt = (i * 2 + odd) & mask;
27947 if (d->perm[i] != elt)
27948 return false;
27951 /* Success! */
27952 if (d->testing_p)
27953 return true;
27955 switch (d->vmode)
27957 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
27958 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
27959 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
27960 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
27961 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
27962 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
27963 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
27964 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
27965 default:
27966 gcc_unreachable ();
27969 in0 = d->op0;
27970 in1 = d->op1;
27971 if (BYTES_BIG_ENDIAN)
27973 x = in0, in0 = in1, in1 = x;
27974 odd = !odd;
27977 out0 = d->target;
27978 out1 = gen_reg_rtx (d->vmode);
27979 if (odd)
27980 x = out0, out0 = out1, out1 = x;
27982 emit_insn (gen (out0, in0, in1, out1));
27983 return true;
27986 /* Recognize patterns for the VZIP insns. */
27988 static bool
27989 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
27991 unsigned int i, high, mask, nelt = d->nelt;
27992 rtx out0, out1, in0, in1, x;
27993 rtx (*gen)(rtx, rtx, rtx, rtx);
27995 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27996 return false;
27998 /* Note that these are little-endian tests. Adjust for big-endian later. */
27999 high = nelt / 2;
28000 if (d->perm[0] == high)
28002 else if (d->perm[0] == 0)
28003 high = 0;
28004 else
28005 return false;
28006 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28008 for (i = 0; i < nelt / 2; i++)
28010 unsigned elt = (i + high) & mask;
28011 if (d->perm[i * 2] != elt)
28012 return false;
28013 elt = (elt + nelt) & mask;
28014 if (d->perm[i * 2 + 1] != elt)
28015 return false;
28018 /* Success! */
28019 if (d->testing_p)
28020 return true;
28022 switch (d->vmode)
28024 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28025 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
28026 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
28027 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
28028 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
28029 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
28030 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
28031 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
28032 default:
28033 gcc_unreachable ();
28036 in0 = d->op0;
28037 in1 = d->op1;
28038 if (BYTES_BIG_ENDIAN)
28040 x = in0, in0 = in1, in1 = x;
28041 high = !high;
28044 out0 = d->target;
28045 out1 = gen_reg_rtx (d->vmode);
28046 if (high)
28047 x = out0, out0 = out1, out1 = x;
28049 emit_insn (gen (out0, in0, in1, out1));
28050 return true;
28053 /* Recognize patterns for the VREV insns. */
28055 static bool
28056 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28058 unsigned int i, j, diff, nelt = d->nelt;
28059 rtx (*gen)(rtx, rtx);
28061 if (!d->one_vector_p)
28062 return false;
28064 diff = d->perm[0];
28065 switch (diff)
28067 case 7:
28068 switch (d->vmode)
28070 case V16QImode: gen = gen_neon_vrev64v16qi; break;
28071 case V8QImode: gen = gen_neon_vrev64v8qi; break;
28072 default:
28073 return false;
28075 break;
28076 case 3:
28077 switch (d->vmode)
28079 case V16QImode: gen = gen_neon_vrev32v16qi; break;
28080 case V8QImode: gen = gen_neon_vrev32v8qi; break;
28081 case V8HImode: gen = gen_neon_vrev64v8hi; break;
28082 case V4HImode: gen = gen_neon_vrev64v4hi; break;
28083 default:
28084 return false;
28086 break;
28087 case 1:
28088 switch (d->vmode)
28090 case V16QImode: gen = gen_neon_vrev16v16qi; break;
28091 case V8QImode: gen = gen_neon_vrev16v8qi; break;
28092 case V8HImode: gen = gen_neon_vrev32v8hi; break;
28093 case V4HImode: gen = gen_neon_vrev32v4hi; break;
28094 case V4SImode: gen = gen_neon_vrev64v4si; break;
28095 case V2SImode: gen = gen_neon_vrev64v2si; break;
28096 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
28097 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
28098 default:
28099 return false;
28101 break;
28102 default:
28103 return false;
28106 for (i = 0; i < nelt ; i += diff + 1)
28107 for (j = 0; j <= diff; j += 1)
28109 /* This is guaranteed to be true as the value of diff
28110 is 7, 3, 1 and we should have enough elements in the
28111 queue to generate this. Getting a vector mask with a
28112 value of diff other than these values implies that
28113 something is wrong by the time we get here. */
28114 gcc_assert (i + j < nelt);
28115 if (d->perm[i + j] != i + diff - j)
28116 return false;
28119 /* Success! */
28120 if (d->testing_p)
28121 return true;
28123 emit_insn (gen (d->target, d->op0));
28124 return true;
28127 /* Recognize patterns for the VTRN insns. */
28129 static bool
28130 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
28132 unsigned int i, odd, mask, nelt = d->nelt;
28133 rtx out0, out1, in0, in1, x;
28134 rtx (*gen)(rtx, rtx, rtx, rtx);
28136 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28137 return false;
28139 /* Note that these are little-endian tests. Adjust for big-endian later. */
28140 if (d->perm[0] == 0)
28141 odd = 0;
28142 else if (d->perm[0] == 1)
28143 odd = 1;
28144 else
28145 return false;
28146 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28148 for (i = 0; i < nelt; i += 2)
28150 if (d->perm[i] != i + odd)
28151 return false;
28152 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28153 return false;
28156 /* Success! */
28157 if (d->testing_p)
28158 return true;
28160 switch (d->vmode)
28162 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28163 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
28164 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
28165 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
28166 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
28167 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
28168 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
28169 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
28170 default:
28171 gcc_unreachable ();
28174 in0 = d->op0;
28175 in1 = d->op1;
28176 if (BYTES_BIG_ENDIAN)
28178 x = in0, in0 = in1, in1 = x;
28179 odd = !odd;
28182 out0 = d->target;
28183 out1 = gen_reg_rtx (d->vmode);
28184 if (odd)
28185 x = out0, out0 = out1, out1 = x;
28187 emit_insn (gen (out0, in0, in1, out1));
28188 return true;
28191 /* Recognize patterns for the VEXT insns. */
28193 static bool
28194 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
28196 unsigned int i, nelt = d->nelt;
28197 rtx (*gen) (rtx, rtx, rtx, rtx);
28198 rtx offset;
28200 unsigned int location;
28202 unsigned int next = d->perm[0] + 1;
28204 /* TODO: Handle GCC's numbering of elements for big-endian. */
28205 if (BYTES_BIG_ENDIAN)
28206 return false;
28208 /* Check if the extracted indexes are increasing by one. */
28209 for (i = 1; i < nelt; next++, i++)
28211 /* If we hit the most significant element of the 2nd vector in
28212 the previous iteration, no need to test further. */
28213 if (next == 2 * nelt)
28214 return false;
28216 /* If we are operating on only one vector: it could be a
28217 rotation. If there are only two elements of size < 64, let
28218 arm_evpc_neon_vrev catch it. */
28219 if (d->one_vector_p && (next == nelt))
28221 if ((nelt == 2) && (d->vmode != V2DImode))
28222 return false;
28223 else
28224 next = 0;
28227 if (d->perm[i] != next)
28228 return false;
28231 location = d->perm[0];
28233 switch (d->vmode)
28235 case V16QImode: gen = gen_neon_vextv16qi; break;
28236 case V8QImode: gen = gen_neon_vextv8qi; break;
28237 case V4HImode: gen = gen_neon_vextv4hi; break;
28238 case V8HImode: gen = gen_neon_vextv8hi; break;
28239 case V2SImode: gen = gen_neon_vextv2si; break;
28240 case V4SImode: gen = gen_neon_vextv4si; break;
28241 case V2SFmode: gen = gen_neon_vextv2sf; break;
28242 case V4SFmode: gen = gen_neon_vextv4sf; break;
28243 case V2DImode: gen = gen_neon_vextv2di; break;
28244 default:
28245 return false;
28248 /* Success! */
28249 if (d->testing_p)
28250 return true;
28252 offset = GEN_INT (location);
28253 emit_insn (gen (d->target, d->op0, d->op1, offset));
28254 return true;
28257 /* The NEON VTBL instruction is a fully variable permuation that's even
28258 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28259 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28260 can do slightly better by expanding this as a constant where we don't
28261 have to apply a mask. */
28263 static bool
28264 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
28266 rtx rperm[MAX_VECT_LEN], sel;
28267 machine_mode vmode = d->vmode;
28268 unsigned int i, nelt = d->nelt;
28270 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28271 numbering of elements for big-endian, we must reverse the order. */
28272 if (BYTES_BIG_ENDIAN)
28273 return false;
28275 if (d->testing_p)
28276 return true;
28278 /* Generic code will try constant permutation twice. Once with the
28279 original mode and again with the elements lowered to QImode.
28280 So wait and don't do the selector expansion ourselves. */
28281 if (vmode != V8QImode && vmode != V16QImode)
28282 return false;
28284 for (i = 0; i < nelt; ++i)
28285 rperm[i] = GEN_INT (d->perm[i]);
28286 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
28287 sel = force_reg (vmode, sel);
28289 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
28290 return true;
28293 static bool
28294 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
28296 /* Check if the input mask matches vext before reordering the
28297 operands. */
28298 if (TARGET_NEON)
28299 if (arm_evpc_neon_vext (d))
28300 return true;
28302 /* The pattern matching functions above are written to look for a small
28303 number to begin the sequence (0, 1, N/2). If we begin with an index
28304 from the second operand, we can swap the operands. */
28305 if (d->perm[0] >= d->nelt)
28307 unsigned i, nelt = d->nelt;
28308 rtx x;
28310 for (i = 0; i < nelt; ++i)
28311 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
28313 x = d->op0;
28314 d->op0 = d->op1;
28315 d->op1 = x;
28318 if (TARGET_NEON)
28320 if (arm_evpc_neon_vuzp (d))
28321 return true;
28322 if (arm_evpc_neon_vzip (d))
28323 return true;
28324 if (arm_evpc_neon_vrev (d))
28325 return true;
28326 if (arm_evpc_neon_vtrn (d))
28327 return true;
28328 return arm_evpc_neon_vtbl (d);
28330 return false;
28333 /* Expand a vec_perm_const pattern. */
28335 bool
28336 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
28338 struct expand_vec_perm_d d;
28339 int i, nelt, which;
28341 d.target = target;
28342 d.op0 = op0;
28343 d.op1 = op1;
28345 d.vmode = GET_MODE (target);
28346 gcc_assert (VECTOR_MODE_P (d.vmode));
28347 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28348 d.testing_p = false;
28350 for (i = which = 0; i < nelt; ++i)
28352 rtx e = XVECEXP (sel, 0, i);
28353 int ei = INTVAL (e) & (2 * nelt - 1);
28354 which |= (ei < nelt ? 1 : 2);
28355 d.perm[i] = ei;
28358 switch (which)
28360 default:
28361 gcc_unreachable();
28363 case 3:
28364 d.one_vector_p = false;
28365 if (!rtx_equal_p (op0, op1))
28366 break;
28368 /* The elements of PERM do not suggest that only the first operand
28369 is used, but both operands are identical. Allow easier matching
28370 of the permutation by folding the permutation into the single
28371 input vector. */
28372 /* FALLTHRU */
28373 case 2:
28374 for (i = 0; i < nelt; ++i)
28375 d.perm[i] &= nelt - 1;
28376 d.op0 = op1;
28377 d.one_vector_p = true;
28378 break;
28380 case 1:
28381 d.op1 = op0;
28382 d.one_vector_p = true;
28383 break;
28386 return arm_expand_vec_perm_const_1 (&d);
28389 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
28391 static bool
28392 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
28393 const unsigned char *sel)
28395 struct expand_vec_perm_d d;
28396 unsigned int i, nelt, which;
28397 bool ret;
28399 d.vmode = vmode;
28400 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28401 d.testing_p = true;
28402 memcpy (d.perm, sel, nelt);
28404 /* Categorize the set of elements in the selector. */
28405 for (i = which = 0; i < nelt; ++i)
28407 unsigned char e = d.perm[i];
28408 gcc_assert (e < 2 * nelt);
28409 which |= (e < nelt ? 1 : 2);
28412 /* For all elements from second vector, fold the elements to first. */
28413 if (which == 2)
28414 for (i = 0; i < nelt; ++i)
28415 d.perm[i] -= nelt;
28417 /* Check whether the mask can be applied to the vector type. */
28418 d.one_vector_p = (which != 3);
28420 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
28421 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
28422 if (!d.one_vector_p)
28423 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
28425 start_sequence ();
28426 ret = arm_expand_vec_perm_const_1 (&d);
28427 end_sequence ();
28429 return ret;
28432 bool
28433 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
28435 /* If we are soft float and we do not have ldrd
28436 then all auto increment forms are ok. */
28437 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
28438 return true;
28440 switch (code)
28442 /* Post increment and Pre Decrement are supported for all
28443 instruction forms except for vector forms. */
28444 case ARM_POST_INC:
28445 case ARM_PRE_DEC:
28446 if (VECTOR_MODE_P (mode))
28448 if (code != ARM_PRE_DEC)
28449 return true;
28450 else
28451 return false;
28454 return true;
28456 case ARM_POST_DEC:
28457 case ARM_PRE_INC:
28458 /* Without LDRD and mode size greater than
28459 word size, there is no point in auto-incrementing
28460 because ldm and stm will not have these forms. */
28461 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
28462 return false;
28464 /* Vector and floating point modes do not support
28465 these auto increment forms. */
28466 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
28467 return false;
28469 return true;
28471 default:
28472 return false;
28476 return false;
28479 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28480 on ARM, since we know that shifts by negative amounts are no-ops.
28481 Additionally, the default expansion code is not available or suitable
28482 for post-reload insn splits (this can occur when the register allocator
28483 chooses not to do a shift in NEON).
28485 This function is used in both initial expand and post-reload splits, and
28486 handles all kinds of 64-bit shifts.
28488 Input requirements:
28489 - It is safe for the input and output to be the same register, but
28490 early-clobber rules apply for the shift amount and scratch registers.
28491 - Shift by register requires both scratch registers. In all other cases
28492 the scratch registers may be NULL.
28493 - Ashiftrt by a register also clobbers the CC register. */
28494 void
28495 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
28496 rtx amount, rtx scratch1, rtx scratch2)
28498 rtx out_high = gen_highpart (SImode, out);
28499 rtx out_low = gen_lowpart (SImode, out);
28500 rtx in_high = gen_highpart (SImode, in);
28501 rtx in_low = gen_lowpart (SImode, in);
28503 /* Terminology:
28504 in = the register pair containing the input value.
28505 out = the destination register pair.
28506 up = the high- or low-part of each pair.
28507 down = the opposite part to "up".
28508 In a shift, we can consider bits to shift from "up"-stream to
28509 "down"-stream, so in a left-shift "up" is the low-part and "down"
28510 is the high-part of each register pair. */
28512 rtx out_up = code == ASHIFT ? out_low : out_high;
28513 rtx out_down = code == ASHIFT ? out_high : out_low;
28514 rtx in_up = code == ASHIFT ? in_low : in_high;
28515 rtx in_down = code == ASHIFT ? in_high : in_low;
28517 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
28518 gcc_assert (out
28519 && (REG_P (out) || GET_CODE (out) == SUBREG)
28520 && GET_MODE (out) == DImode);
28521 gcc_assert (in
28522 && (REG_P (in) || GET_CODE (in) == SUBREG)
28523 && GET_MODE (in) == DImode);
28524 gcc_assert (amount
28525 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
28526 && GET_MODE (amount) == SImode)
28527 || CONST_INT_P (amount)));
28528 gcc_assert (scratch1 == NULL
28529 || (GET_CODE (scratch1) == SCRATCH)
28530 || (GET_MODE (scratch1) == SImode
28531 && REG_P (scratch1)));
28532 gcc_assert (scratch2 == NULL
28533 || (GET_CODE (scratch2) == SCRATCH)
28534 || (GET_MODE (scratch2) == SImode
28535 && REG_P (scratch2)));
28536 gcc_assert (!REG_P (out) || !REG_P (amount)
28537 || !HARD_REGISTER_P (out)
28538 || (REGNO (out) != REGNO (amount)
28539 && REGNO (out) + 1 != REGNO (amount)));
28541 /* Macros to make following code more readable. */
28542 #define SUB_32(DEST,SRC) \
28543 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28544 #define RSB_32(DEST,SRC) \
28545 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28546 #define SUB_S_32(DEST,SRC) \
28547 gen_addsi3_compare0 ((DEST), (SRC), \
28548 GEN_INT (-32))
28549 #define SET(DEST,SRC) \
28550 gen_rtx_SET (SImode, (DEST), (SRC))
28551 #define SHIFT(CODE,SRC,AMOUNT) \
28552 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28553 #define LSHIFT(CODE,SRC,AMOUNT) \
28554 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28555 SImode, (SRC), (AMOUNT))
28556 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28557 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28558 SImode, (SRC), (AMOUNT))
28559 #define ORR(A,B) \
28560 gen_rtx_IOR (SImode, (A), (B))
28561 #define BRANCH(COND,LABEL) \
28562 gen_arm_cond_branch ((LABEL), \
28563 gen_rtx_ ## COND (CCmode, cc_reg, \
28564 const0_rtx), \
28565 cc_reg)
28567 /* Shifts by register and shifts by constant are handled separately. */
28568 if (CONST_INT_P (amount))
28570 /* We have a shift-by-constant. */
28572 /* First, handle out-of-range shift amounts.
28573 In both cases we try to match the result an ARM instruction in a
28574 shift-by-register would give. This helps reduce execution
28575 differences between optimization levels, but it won't stop other
28576 parts of the compiler doing different things. This is "undefined
28577 behaviour, in any case. */
28578 if (INTVAL (amount) <= 0)
28579 emit_insn (gen_movdi (out, in));
28580 else if (INTVAL (amount) >= 64)
28582 if (code == ASHIFTRT)
28584 rtx const31_rtx = GEN_INT (31);
28585 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
28586 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
28588 else
28589 emit_insn (gen_movdi (out, const0_rtx));
28592 /* Now handle valid shifts. */
28593 else if (INTVAL (amount) < 32)
28595 /* Shifts by a constant less than 32. */
28596 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
28598 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28599 emit_insn (SET (out_down,
28600 ORR (REV_LSHIFT (code, in_up, reverse_amount),
28601 out_down)));
28602 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28604 else
28606 /* Shifts by a constant greater than 31. */
28607 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
28609 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
28610 if (code == ASHIFTRT)
28611 emit_insn (gen_ashrsi3 (out_up, in_up,
28612 GEN_INT (31)));
28613 else
28614 emit_insn (SET (out_up, const0_rtx));
28617 else
28619 /* We have a shift-by-register. */
28620 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
28622 /* This alternative requires the scratch registers. */
28623 gcc_assert (scratch1 && REG_P (scratch1));
28624 gcc_assert (scratch2 && REG_P (scratch2));
28626 /* We will need the values "amount-32" and "32-amount" later.
28627 Swapping them around now allows the later code to be more general. */
28628 switch (code)
28630 case ASHIFT:
28631 emit_insn (SUB_32 (scratch1, amount));
28632 emit_insn (RSB_32 (scratch2, amount));
28633 break;
28634 case ASHIFTRT:
28635 emit_insn (RSB_32 (scratch1, amount));
28636 /* Also set CC = amount > 32. */
28637 emit_insn (SUB_S_32 (scratch2, amount));
28638 break;
28639 case LSHIFTRT:
28640 emit_insn (RSB_32 (scratch1, amount));
28641 emit_insn (SUB_32 (scratch2, amount));
28642 break;
28643 default:
28644 gcc_unreachable ();
28647 /* Emit code like this:
28649 arithmetic-left:
28650 out_down = in_down << amount;
28651 out_down = (in_up << (amount - 32)) | out_down;
28652 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28653 out_up = in_up << amount;
28655 arithmetic-right:
28656 out_down = in_down >> amount;
28657 out_down = (in_up << (32 - amount)) | out_down;
28658 if (amount < 32)
28659 out_down = ((signed)in_up >> (amount - 32)) | out_down;
28660 out_up = in_up << amount;
28662 logical-right:
28663 out_down = in_down >> amount;
28664 out_down = (in_up << (32 - amount)) | out_down;
28665 if (amount < 32)
28666 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28667 out_up = in_up << amount;
28669 The ARM and Thumb2 variants are the same but implemented slightly
28670 differently. If this were only called during expand we could just
28671 use the Thumb2 case and let combine do the right thing, but this
28672 can also be called from post-reload splitters. */
28674 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28676 if (!TARGET_THUMB2)
28678 /* Emit code for ARM mode. */
28679 emit_insn (SET (out_down,
28680 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
28681 if (code == ASHIFTRT)
28683 rtx_code_label *done_label = gen_label_rtx ();
28684 emit_jump_insn (BRANCH (LT, done_label));
28685 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
28686 out_down)));
28687 emit_label (done_label);
28689 else
28690 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
28691 out_down)));
28693 else
28695 /* Emit code for Thumb2 mode.
28696 Thumb2 can't do shift and or in one insn. */
28697 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
28698 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
28700 if (code == ASHIFTRT)
28702 rtx_code_label *done_label = gen_label_rtx ();
28703 emit_jump_insn (BRANCH (LT, done_label));
28704 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
28705 emit_insn (SET (out_down, ORR (out_down, scratch2)));
28706 emit_label (done_label);
28708 else
28710 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
28711 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
28715 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28718 #undef SUB_32
28719 #undef RSB_32
28720 #undef SUB_S_32
28721 #undef SET
28722 #undef SHIFT
28723 #undef LSHIFT
28724 #undef REV_LSHIFT
28725 #undef ORR
28726 #undef BRANCH
28730 /* Returns true if a valid comparison operation and makes
28731 the operands in a form that is valid. */
28732 bool
28733 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
28735 enum rtx_code code = GET_CODE (*comparison);
28736 int code_int;
28737 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
28738 ? GET_MODE (*op2) : GET_MODE (*op1);
28740 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
28742 if (code == UNEQ || code == LTGT)
28743 return false;
28745 code_int = (int)code;
28746 arm_canonicalize_comparison (&code_int, op1, op2, 0);
28747 PUT_CODE (*comparison, (enum rtx_code)code_int);
28749 switch (mode)
28751 case SImode:
28752 if (!arm_add_operand (*op1, mode))
28753 *op1 = force_reg (mode, *op1);
28754 if (!arm_add_operand (*op2, mode))
28755 *op2 = force_reg (mode, *op2);
28756 return true;
28758 case DImode:
28759 if (!cmpdi_operand (*op1, mode))
28760 *op1 = force_reg (mode, *op1);
28761 if (!cmpdi_operand (*op2, mode))
28762 *op2 = force_reg (mode, *op2);
28763 return true;
28765 case SFmode:
28766 case DFmode:
28767 if (!arm_float_compare_operand (*op1, mode))
28768 *op1 = force_reg (mode, *op1);
28769 if (!arm_float_compare_operand (*op2, mode))
28770 *op2 = force_reg (mode, *op2);
28771 return true;
28772 default:
28773 break;
28776 return false;
28780 /* Maximum number of instructions to set block of memory. */
28781 static int
28782 arm_block_set_max_insns (void)
28784 if (optimize_function_for_size_p (cfun))
28785 return 4;
28786 else
28787 return current_tune->max_insns_inline_memset;
28790 /* Return TRUE if it's profitable to set block of memory for
28791 non-vectorized case. VAL is the value to set the memory
28792 with. LENGTH is the number of bytes to set. ALIGN is the
28793 alignment of the destination memory in bytes. UNALIGNED_P
28794 is TRUE if we can only set the memory with instructions
28795 meeting alignment requirements. USE_STRD_P is TRUE if we
28796 can use strd to set the memory. */
28797 static bool
28798 arm_block_set_non_vect_profit_p (rtx val,
28799 unsigned HOST_WIDE_INT length,
28800 unsigned HOST_WIDE_INT align,
28801 bool unaligned_p, bool use_strd_p)
28803 int num = 0;
28804 /* For leftovers in bytes of 0-7, we can set the memory block using
28805 strb/strh/str with minimum instruction number. */
28806 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
28808 if (unaligned_p)
28810 num = arm_const_inline_cost (SET, val);
28811 num += length / align + length % align;
28813 else if (use_strd_p)
28815 num = arm_const_double_inline_cost (val);
28816 num += (length >> 3) + leftover[length & 7];
28818 else
28820 num = arm_const_inline_cost (SET, val);
28821 num += (length >> 2) + leftover[length & 3];
28824 /* We may be able to combine last pair STRH/STRB into a single STR
28825 by shifting one byte back. */
28826 if (unaligned_access && length > 3 && (length & 3) == 3)
28827 num--;
28829 return (num <= arm_block_set_max_insns ());
28832 /* Return TRUE if it's profitable to set block of memory for
28833 vectorized case. LENGTH is the number of bytes to set.
28834 ALIGN is the alignment of destination memory in bytes.
28835 MODE is the vector mode used to set the memory. */
28836 static bool
28837 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
28838 unsigned HOST_WIDE_INT align,
28839 machine_mode mode)
28841 int num;
28842 bool unaligned_p = ((align & 3) != 0);
28843 unsigned int nelt = GET_MODE_NUNITS (mode);
28845 /* Instruction loading constant value. */
28846 num = 1;
28847 /* Instructions storing the memory. */
28848 num += (length + nelt - 1) / nelt;
28849 /* Instructions adjusting the address expression. Only need to
28850 adjust address expression if it's 4 bytes aligned and bytes
28851 leftover can only be stored by mis-aligned store instruction. */
28852 if (!unaligned_p && (length & 3) != 0)
28853 num++;
28855 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
28856 if (!unaligned_p && mode == V16QImode)
28857 num--;
28859 return (num <= arm_block_set_max_insns ());
28862 /* Set a block of memory using vectorization instructions for the
28863 unaligned case. We fill the first LENGTH bytes of the memory
28864 area starting from DSTBASE with byte constant VALUE. ALIGN is
28865 the alignment requirement of memory. Return TRUE if succeeded. */
28866 static bool
28867 arm_block_set_unaligned_vect (rtx dstbase,
28868 unsigned HOST_WIDE_INT length,
28869 unsigned HOST_WIDE_INT value,
28870 unsigned HOST_WIDE_INT align)
28872 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
28873 rtx dst, mem;
28874 rtx val_elt, val_vec, reg;
28875 rtx rval[MAX_VECT_LEN];
28876 rtx (*gen_func) (rtx, rtx);
28877 machine_mode mode;
28878 unsigned HOST_WIDE_INT v = value;
28880 gcc_assert ((align & 0x3) != 0);
28881 nelt_v8 = GET_MODE_NUNITS (V8QImode);
28882 nelt_v16 = GET_MODE_NUNITS (V16QImode);
28883 if (length >= nelt_v16)
28885 mode = V16QImode;
28886 gen_func = gen_movmisalignv16qi;
28888 else
28890 mode = V8QImode;
28891 gen_func = gen_movmisalignv8qi;
28893 nelt_mode = GET_MODE_NUNITS (mode);
28894 gcc_assert (length >= nelt_mode);
28895 /* Skip if it isn't profitable. */
28896 if (!arm_block_set_vect_profit_p (length, align, mode))
28897 return false;
28899 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28900 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28902 v = sext_hwi (v, BITS_PER_WORD);
28903 val_elt = GEN_INT (v);
28904 for (j = 0; j < nelt_mode; j++)
28905 rval[j] = val_elt;
28907 reg = gen_reg_rtx (mode);
28908 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28909 /* Emit instruction loading the constant value. */
28910 emit_move_insn (reg, val_vec);
28912 /* Handle nelt_mode bytes in a vector. */
28913 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
28915 emit_insn ((*gen_func) (mem, reg));
28916 if (i + 2 * nelt_mode <= length)
28917 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
28920 /* If there are not less than nelt_v8 bytes leftover, we must be in
28921 V16QI mode. */
28922 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
28924 /* Handle (8, 16) bytes leftover. */
28925 if (i + nelt_v8 < length)
28927 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
28928 /* We are shifting bytes back, set the alignment accordingly. */
28929 if ((length & 1) != 0 && align >= 2)
28930 set_mem_align (mem, BITS_PER_UNIT);
28932 emit_insn (gen_movmisalignv16qi (mem, reg));
28934 /* Handle (0, 8] bytes leftover. */
28935 else if (i < length && i + nelt_v8 >= length)
28937 if (mode == V16QImode)
28939 reg = gen_lowpart (V8QImode, reg);
28940 mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
28942 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
28943 + (nelt_mode - nelt_v8))));
28944 /* We are shifting bytes back, set the alignment accordingly. */
28945 if ((length & 1) != 0 && align >= 2)
28946 set_mem_align (mem, BITS_PER_UNIT);
28948 emit_insn (gen_movmisalignv8qi (mem, reg));
28951 return true;
28954 /* Set a block of memory using vectorization instructions for the
28955 aligned case. We fill the first LENGTH bytes of the memory area
28956 starting from DSTBASE with byte constant VALUE. ALIGN is the
28957 alignment requirement of memory. Return TRUE if succeeded. */
28958 static bool
28959 arm_block_set_aligned_vect (rtx dstbase,
28960 unsigned HOST_WIDE_INT length,
28961 unsigned HOST_WIDE_INT value,
28962 unsigned HOST_WIDE_INT align)
28964 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
28965 rtx dst, addr, mem;
28966 rtx val_elt, val_vec, reg;
28967 rtx rval[MAX_VECT_LEN];
28968 machine_mode mode;
28969 unsigned HOST_WIDE_INT v = value;
28971 gcc_assert ((align & 0x3) == 0);
28972 nelt_v8 = GET_MODE_NUNITS (V8QImode);
28973 nelt_v16 = GET_MODE_NUNITS (V16QImode);
28974 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
28975 mode = V16QImode;
28976 else
28977 mode = V8QImode;
28979 nelt_mode = GET_MODE_NUNITS (mode);
28980 gcc_assert (length >= nelt_mode);
28981 /* Skip if it isn't profitable. */
28982 if (!arm_block_set_vect_profit_p (length, align, mode))
28983 return false;
28985 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28987 v = sext_hwi (v, BITS_PER_WORD);
28988 val_elt = GEN_INT (v);
28989 for (j = 0; j < nelt_mode; j++)
28990 rval[j] = val_elt;
28992 reg = gen_reg_rtx (mode);
28993 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28994 /* Emit instruction loading the constant value. */
28995 emit_move_insn (reg, val_vec);
28997 i = 0;
28998 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
28999 if (mode == V16QImode)
29001 mem = adjust_automodify_address (dstbase, mode, dst, 0);
29002 emit_insn (gen_movmisalignv16qi (mem, reg));
29003 i += nelt_mode;
29004 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29005 if (i + nelt_v8 < length && i + nelt_v16 > length)
29007 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29008 mem = adjust_automodify_address (dstbase, mode, dst, 0);
29009 /* We are shifting bytes back, set the alignment accordingly. */
29010 if ((length & 0x3) == 0)
29011 set_mem_align (mem, BITS_PER_UNIT * 4);
29012 else if ((length & 0x1) == 0)
29013 set_mem_align (mem, BITS_PER_UNIT * 2);
29014 else
29015 set_mem_align (mem, BITS_PER_UNIT);
29017 emit_insn (gen_movmisalignv16qi (mem, reg));
29018 return true;
29020 /* Fall through for bytes leftover. */
29021 mode = V8QImode;
29022 nelt_mode = GET_MODE_NUNITS (mode);
29023 reg = gen_lowpart (V8QImode, reg);
29026 /* Handle 8 bytes in a vector. */
29027 for (; (i + nelt_mode <= length); i += nelt_mode)
29029 addr = plus_constant (Pmode, dst, i);
29030 mem = adjust_automodify_address (dstbase, mode, addr, i);
29031 emit_move_insn (mem, reg);
29034 /* Handle single word leftover by shifting 4 bytes back. We can
29035 use aligned access for this case. */
29036 if (i + UNITS_PER_WORD == length)
29038 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
29039 mem = adjust_automodify_address (dstbase, mode,
29040 addr, i - UNITS_PER_WORD);
29041 /* We are shifting 4 bytes back, set the alignment accordingly. */
29042 if (align > UNITS_PER_WORD)
29043 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
29045 emit_move_insn (mem, reg);
29047 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29048 We have to use unaligned access for this case. */
29049 else if (i < length)
29051 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29052 mem = adjust_automodify_address (dstbase, mode, dst, 0);
29053 /* We are shifting bytes back, set the alignment accordingly. */
29054 if ((length & 1) == 0)
29055 set_mem_align (mem, BITS_PER_UNIT * 2);
29056 else
29057 set_mem_align (mem, BITS_PER_UNIT);
29059 emit_insn (gen_movmisalignv8qi (mem, reg));
29062 return true;
29065 /* Set a block of memory using plain strh/strb instructions, only
29066 using instructions allowed by ALIGN on processor. We fill the
29067 first LENGTH bytes of the memory area starting from DSTBASE
29068 with byte constant VALUE. ALIGN is the alignment requirement
29069 of memory. */
29070 static bool
29071 arm_block_set_unaligned_non_vect (rtx dstbase,
29072 unsigned HOST_WIDE_INT length,
29073 unsigned HOST_WIDE_INT value,
29074 unsigned HOST_WIDE_INT align)
29076 unsigned int i;
29077 rtx dst, addr, mem;
29078 rtx val_exp, val_reg, reg;
29079 machine_mode mode;
29080 HOST_WIDE_INT v = value;
29082 gcc_assert (align == 1 || align == 2);
29084 if (align == 2)
29085 v |= (value << BITS_PER_UNIT);
29087 v = sext_hwi (v, BITS_PER_WORD);
29088 val_exp = GEN_INT (v);
29089 /* Skip if it isn't profitable. */
29090 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29091 align, true, false))
29092 return false;
29094 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29095 mode = (align == 2 ? HImode : QImode);
29096 val_reg = force_reg (SImode, val_exp);
29097 reg = gen_lowpart (mode, val_reg);
29099 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
29101 addr = plus_constant (Pmode, dst, i);
29102 mem = adjust_automodify_address (dstbase, mode, addr, i);
29103 emit_move_insn (mem, reg);
29106 /* Handle single byte leftover. */
29107 if (i + 1 == length)
29109 reg = gen_lowpart (QImode, val_reg);
29110 addr = plus_constant (Pmode, dst, i);
29111 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29112 emit_move_insn (mem, reg);
29113 i++;
29116 gcc_assert (i == length);
29117 return true;
29120 /* Set a block of memory using plain strd/str/strh/strb instructions,
29121 to permit unaligned copies on processors which support unaligned
29122 semantics for those instructions. We fill the first LENGTH bytes
29123 of the memory area starting from DSTBASE with byte constant VALUE.
29124 ALIGN is the alignment requirement of memory. */
29125 static bool
29126 arm_block_set_aligned_non_vect (rtx dstbase,
29127 unsigned HOST_WIDE_INT length,
29128 unsigned HOST_WIDE_INT value,
29129 unsigned HOST_WIDE_INT align)
29131 unsigned int i;
29132 rtx dst, addr, mem;
29133 rtx val_exp, val_reg, reg;
29134 unsigned HOST_WIDE_INT v;
29135 bool use_strd_p;
29137 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
29138 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
29140 v = (value | (value << 8) | (value << 16) | (value << 24));
29141 if (length < UNITS_PER_WORD)
29142 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
29144 if (use_strd_p)
29145 v |= (v << BITS_PER_WORD);
29146 else
29147 v = sext_hwi (v, BITS_PER_WORD);
29149 val_exp = GEN_INT (v);
29150 /* Skip if it isn't profitable. */
29151 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29152 align, false, use_strd_p))
29154 if (!use_strd_p)
29155 return false;
29157 /* Try without strd. */
29158 v = (v >> BITS_PER_WORD);
29159 v = sext_hwi (v, BITS_PER_WORD);
29160 val_exp = GEN_INT (v);
29161 use_strd_p = false;
29162 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29163 align, false, use_strd_p))
29164 return false;
29167 i = 0;
29168 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29169 /* Handle double words using strd if possible. */
29170 if (use_strd_p)
29172 val_reg = force_reg (DImode, val_exp);
29173 reg = val_reg;
29174 for (; (i + 8 <= length); i += 8)
29176 addr = plus_constant (Pmode, dst, i);
29177 mem = adjust_automodify_address (dstbase, DImode, addr, i);
29178 emit_move_insn (mem, reg);
29181 else
29182 val_reg = force_reg (SImode, val_exp);
29184 /* Handle words. */
29185 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
29186 for (; (i + 4 <= length); i += 4)
29188 addr = plus_constant (Pmode, dst, i);
29189 mem = adjust_automodify_address (dstbase, SImode, addr, i);
29190 if ((align & 3) == 0)
29191 emit_move_insn (mem, reg);
29192 else
29193 emit_insn (gen_unaligned_storesi (mem, reg));
29196 /* Merge last pair of STRH and STRB into a STR if possible. */
29197 if (unaligned_access && i > 0 && (i + 3) == length)
29199 addr = plus_constant (Pmode, dst, i - 1);
29200 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
29201 /* We are shifting one byte back, set the alignment accordingly. */
29202 if ((align & 1) == 0)
29203 set_mem_align (mem, BITS_PER_UNIT);
29205 /* Most likely this is an unaligned access, and we can't tell at
29206 compilation time. */
29207 emit_insn (gen_unaligned_storesi (mem, reg));
29208 return true;
29211 /* Handle half word leftover. */
29212 if (i + 2 <= length)
29214 reg = gen_lowpart (HImode, val_reg);
29215 addr = plus_constant (Pmode, dst, i);
29216 mem = adjust_automodify_address (dstbase, HImode, addr, i);
29217 if ((align & 1) == 0)
29218 emit_move_insn (mem, reg);
29219 else
29220 emit_insn (gen_unaligned_storehi (mem, reg));
29222 i += 2;
29225 /* Handle single byte leftover. */
29226 if (i + 1 == length)
29228 reg = gen_lowpart (QImode, val_reg);
29229 addr = plus_constant (Pmode, dst, i);
29230 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29231 emit_move_insn (mem, reg);
29234 return true;
29237 /* Set a block of memory using vectorization instructions for both
29238 aligned and unaligned cases. We fill the first LENGTH bytes of
29239 the memory area starting from DSTBASE with byte constant VALUE.
29240 ALIGN is the alignment requirement of memory. */
29241 static bool
29242 arm_block_set_vect (rtx dstbase,
29243 unsigned HOST_WIDE_INT length,
29244 unsigned HOST_WIDE_INT value,
29245 unsigned HOST_WIDE_INT align)
29247 /* Check whether we need to use unaligned store instruction. */
29248 if (((align & 3) != 0 || (length & 3) != 0)
29249 /* Check whether unaligned store instruction is available. */
29250 && (!unaligned_access || BYTES_BIG_ENDIAN))
29251 return false;
29253 if ((align & 3) == 0)
29254 return arm_block_set_aligned_vect (dstbase, length, value, align);
29255 else
29256 return arm_block_set_unaligned_vect (dstbase, length, value, align);
29259 /* Expand string store operation. Firstly we try to do that by using
29260 vectorization instructions, then try with ARM unaligned access and
29261 double-word store if profitable. OPERANDS[0] is the destination,
29262 OPERANDS[1] is the number of bytes, operands[2] is the value to
29263 initialize the memory, OPERANDS[3] is the known alignment of the
29264 destination. */
29265 bool
29266 arm_gen_setmem (rtx *operands)
29268 rtx dstbase = operands[0];
29269 unsigned HOST_WIDE_INT length;
29270 unsigned HOST_WIDE_INT value;
29271 unsigned HOST_WIDE_INT align;
29273 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
29274 return false;
29276 length = UINTVAL (operands[1]);
29277 if (length > 64)
29278 return false;
29280 value = (UINTVAL (operands[2]) & 0xFF);
29281 align = UINTVAL (operands[3]);
29282 if (TARGET_NEON && length >= 8
29283 && current_tune->string_ops_prefer_neon
29284 && arm_block_set_vect (dstbase, length, value, align))
29285 return true;
29287 if (!unaligned_access && (align & 3) != 0)
29288 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
29290 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
29294 static bool
29295 arm_macro_fusion_p (void)
29297 return current_tune->fuseable_ops != ARM_FUSE_NOTHING;
29301 static bool
29302 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
29304 rtx set_dest;
29305 rtx prev_set = single_set (prev);
29306 rtx curr_set = single_set (curr);
29308 if (!prev_set
29309 || !curr_set)
29310 return false;
29312 if (any_condjump_p (curr))
29313 return false;
29315 if (!arm_macro_fusion_p ())
29316 return false;
29318 if (current_tune->fuseable_ops & ARM_FUSE_MOVW_MOVT)
29320 /* We are trying to fuse
29321 movw imm / movt imm
29322 instructions as a group that gets scheduled together. */
29324 set_dest = SET_DEST (curr_set);
29326 if (GET_MODE (set_dest) != SImode)
29327 return false;
29329 /* We are trying to match:
29330 prev (movw) == (set (reg r0) (const_int imm16))
29331 curr (movt) == (set (zero_extract (reg r0)
29332 (const_int 16)
29333 (const_int 16))
29334 (const_int imm16_1))
29336 prev (movw) == (set (reg r1)
29337 (high (symbol_ref ("SYM"))))
29338 curr (movt) == (set (reg r0)
29339 (lo_sum (reg r1)
29340 (symbol_ref ("SYM")))) */
29341 if (GET_CODE (set_dest) == ZERO_EXTRACT)
29343 if (CONST_INT_P (SET_SRC (curr_set))
29344 && CONST_INT_P (SET_SRC (prev_set))
29345 && REG_P (XEXP (set_dest, 0))
29346 && REG_P (SET_DEST (prev_set))
29347 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
29348 return true;
29350 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
29351 && REG_P (SET_DEST (curr_set))
29352 && REG_P (SET_DEST (prev_set))
29353 && GET_CODE (SET_SRC (prev_set)) == HIGH
29354 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
29355 return true;
29357 return false;
29360 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
29362 static unsigned HOST_WIDE_INT
29363 arm_asan_shadow_offset (void)
29365 return (unsigned HOST_WIDE_INT) 1 << 29;
29369 /* This is a temporary fix for PR60655. Ideally we need
29370 to handle most of these cases in the generic part but
29371 currently we reject minus (..) (sym_ref). We try to
29372 ameliorate the case with minus (sym_ref1) (sym_ref2)
29373 where they are in the same section. */
29375 static bool
29376 arm_const_not_ok_for_debug_p (rtx p)
29378 tree decl_op0 = NULL;
29379 tree decl_op1 = NULL;
29381 if (GET_CODE (p) == MINUS)
29383 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
29385 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
29386 if (decl_op1
29387 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
29388 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
29390 if ((TREE_CODE (decl_op1) == VAR_DECL
29391 || TREE_CODE (decl_op1) == CONST_DECL)
29392 && (TREE_CODE (decl_op0) == VAR_DECL
29393 || TREE_CODE (decl_op0) == CONST_DECL))
29394 return (get_variable_section (decl_op1, false)
29395 != get_variable_section (decl_op0, false));
29397 if (TREE_CODE (decl_op1) == LABEL_DECL
29398 && TREE_CODE (decl_op0) == LABEL_DECL)
29399 return (DECL_CONTEXT (decl_op1)
29400 != DECL_CONTEXT (decl_op0));
29403 return true;
29407 return false;
29410 /* return TRUE if x is a reference to a value in a constant pool */
29411 extern bool
29412 arm_is_constant_pool_ref (rtx x)
29414 return (MEM_P (x)
29415 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
29416 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
29419 /* If MEM is in the form of [base+offset], extract the two parts
29420 of address and set to BASE and OFFSET, otherwise return false
29421 after clearing BASE and OFFSET. */
29423 static bool
29424 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
29426 rtx addr;
29428 gcc_assert (MEM_P (mem));
29430 addr = XEXP (mem, 0);
29432 /* Strip off const from addresses like (const (addr)). */
29433 if (GET_CODE (addr) == CONST)
29434 addr = XEXP (addr, 0);
29436 if (GET_CODE (addr) == REG)
29438 *base = addr;
29439 *offset = const0_rtx;
29440 return true;
29443 if (GET_CODE (addr) == PLUS
29444 && GET_CODE (XEXP (addr, 0)) == REG
29445 && CONST_INT_P (XEXP (addr, 1)))
29447 *base = XEXP (addr, 0);
29448 *offset = XEXP (addr, 1);
29449 return true;
29452 *base = NULL_RTX;
29453 *offset = NULL_RTX;
29455 return false;
29458 /* If INSN is a load or store of address in the form of [base+offset],
29459 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
29460 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
29461 otherwise return FALSE. */
29463 static bool
29464 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
29466 rtx x, dest, src;
29468 gcc_assert (INSN_P (insn));
29469 x = PATTERN (insn);
29470 if (GET_CODE (x) != SET)
29471 return false;
29473 src = SET_SRC (x);
29474 dest = SET_DEST (x);
29475 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
29477 *is_load = false;
29478 extract_base_offset_in_addr (dest, base, offset);
29480 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
29482 *is_load = true;
29483 extract_base_offset_in_addr (src, base, offset);
29485 else
29486 return false;
29488 return (*base != NULL_RTX && *offset != NULL_RTX);
29491 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
29493 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
29494 and PRI are only calculated for these instructions. For other instruction,
29495 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
29496 instruction fusion can be supported by returning different priorities.
29498 It's important that irrelevant instructions get the largest FUSION_PRI. */
29500 static void
29501 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
29502 int *fusion_pri, int *pri)
29504 int tmp, off_val;
29505 bool is_load;
29506 rtx base, offset;
29508 gcc_assert (INSN_P (insn));
29510 tmp = max_pri - 1;
29511 if (!fusion_load_store (insn, &base, &offset, &is_load))
29513 *pri = tmp;
29514 *fusion_pri = tmp;
29515 return;
29518 /* Load goes first. */
29519 if (is_load)
29520 *fusion_pri = tmp - 1;
29521 else
29522 *fusion_pri = tmp - 2;
29524 tmp /= 2;
29526 /* INSN with smaller base register goes first. */
29527 tmp -= ((REGNO (base) & 0xff) << 20);
29529 /* INSN with smaller offset goes first. */
29530 off_val = (int)(INTVAL (offset));
29531 if (off_val >= 0)
29532 tmp -= (off_val & 0xfffff);
29533 else
29534 tmp += ((- off_val) & 0xfffff);
29536 *pri = tmp;
29537 return;
29539 #include "gt-arm.h"