remove need for store_values_directly
[official-gcc.git] / gcc / config / arm / arm.c
blob6826c7886faea33401cc5a37fdd6eaf544417690
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2015 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "hash-table.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "hash-set.h"
30 #include "machmode.h"
31 #include "vec.h"
32 #include "double-int.h"
33 #include "input.h"
34 #include "alias.h"
35 #include "symtab.h"
36 #include "wide-int.h"
37 #include "inchash.h"
38 #include "tree.h"
39 #include "fold-const.h"
40 #include "stringpool.h"
41 #include "stor-layout.h"
42 #include "calls.h"
43 #include "varasm.h"
44 #include "obstack.h"
45 #include "regs.h"
46 #include "hard-reg-set.h"
47 #include "insn-config.h"
48 #include "conditions.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "flags.h"
52 #include "reload.h"
53 #include "function.h"
54 #include "hashtab.h"
55 #include "statistics.h"
56 #include "real.h"
57 #include "fixed-value.h"
58 #include "expmed.h"
59 #include "dojump.h"
60 #include "explow.h"
61 #include "emit-rtl.h"
62 #include "stmt.h"
63 #include "expr.h"
64 #include "insn-codes.h"
65 #include "optabs.h"
66 #include "diagnostic-core.h"
67 #include "recog.h"
68 #include "predict.h"
69 #include "dominance.h"
70 #include "cfg.h"
71 #include "cfgrtl.h"
72 #include "cfganal.h"
73 #include "lcm.h"
74 #include "cfgbuild.h"
75 #include "cfgcleanup.h"
76 #include "basic-block.h"
77 #include "hash-map.h"
78 #include "is-a.h"
79 #include "plugin-api.h"
80 #include "ipa-ref.h"
81 #include "cgraph.h"
82 #include "ggc.h"
83 #include "except.h"
84 #include "tm_p.h"
85 #include "target.h"
86 #include "sched-int.h"
87 #include "target-def.h"
88 #include "debug.h"
89 #include "langhooks.h"
90 #include "df.h"
91 #include "intl.h"
92 #include "libfuncs.h"
93 #include "params.h"
94 #include "opts.h"
95 #include "dumpfile.h"
96 #include "gimple-expr.h"
97 #include "builtins.h"
98 #include "tm-constrs.h"
99 #include "rtl-iter.h"
100 #include "sched-int.h"
102 /* Forward definitions of types. */
103 typedef struct minipool_node Mnode;
104 typedef struct minipool_fixup Mfix;
106 void (*arm_lang_output_object_attributes_hook)(void);
108 struct four_ints
110 int i[4];
113 /* Forward function declarations. */
114 static bool arm_const_not_ok_for_debug_p (rtx);
115 static bool arm_needs_doubleword_align (machine_mode, const_tree);
116 static int arm_compute_static_chain_stack_bytes (void);
117 static arm_stack_offsets *arm_get_frame_offsets (void);
118 static void arm_add_gc_roots (void);
119 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
120 HOST_WIDE_INT, rtx, rtx, int, int);
121 static unsigned bit_count (unsigned long);
122 static int arm_address_register_rtx_p (rtx, int);
123 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
124 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
125 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
126 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
127 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
128 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
129 inline static int thumb1_index_register_rtx_p (rtx, int);
130 static int thumb_far_jump_used_p (void);
131 static bool thumb_force_lr_save (void);
132 static unsigned arm_size_return_regs (void);
133 static bool arm_assemble_integer (rtx, unsigned int, int);
134 static void arm_print_operand (FILE *, rtx, int);
135 static void arm_print_operand_address (FILE *, rtx);
136 static bool arm_print_operand_punct_valid_p (unsigned char code);
137 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
138 static arm_cc get_arm_condition_code (rtx);
139 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
140 static const char *output_multi_immediate (rtx *, const char *, const char *,
141 int, HOST_WIDE_INT);
142 static const char *shift_op (rtx, HOST_WIDE_INT *);
143 static struct machine_function *arm_init_machine_status (void);
144 static void thumb_exit (FILE *, int);
145 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
146 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
147 static Mnode *add_minipool_forward_ref (Mfix *);
148 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
149 static Mnode *add_minipool_backward_ref (Mfix *);
150 static void assign_minipool_offsets (Mfix *);
151 static void arm_print_value (FILE *, rtx);
152 static void dump_minipool (rtx_insn *);
153 static int arm_barrier_cost (rtx);
154 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
155 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
156 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
157 machine_mode, rtx);
158 static void arm_reorg (void);
159 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
160 static unsigned long arm_compute_save_reg0_reg12_mask (void);
161 static unsigned long arm_compute_save_reg_mask (void);
162 static unsigned long arm_isr_value (tree);
163 static unsigned long arm_compute_func_type (void);
164 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
165 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
166 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
167 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
168 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
169 #endif
170 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
171 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
172 static int arm_comp_type_attributes (const_tree, const_tree);
173 static void arm_set_default_type_attributes (tree);
174 static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
175 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
176 static int optimal_immediate_sequence (enum rtx_code code,
177 unsigned HOST_WIDE_INT val,
178 struct four_ints *return_sequence);
179 static int optimal_immediate_sequence_1 (enum rtx_code code,
180 unsigned HOST_WIDE_INT val,
181 struct four_ints *return_sequence,
182 int i);
183 static int arm_get_strip_length (int);
184 static bool arm_function_ok_for_sibcall (tree, tree);
185 static machine_mode arm_promote_function_mode (const_tree,
186 machine_mode, int *,
187 const_tree, int);
188 static bool arm_return_in_memory (const_tree, const_tree);
189 static rtx arm_function_value (const_tree, const_tree, bool);
190 static rtx arm_libcall_value_1 (machine_mode);
191 static rtx arm_libcall_value (machine_mode, const_rtx);
192 static bool arm_function_value_regno_p (const unsigned int);
193 static void arm_internal_label (FILE *, const char *, unsigned long);
194 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
195 tree);
196 static bool arm_have_conditional_execution (void);
197 static bool arm_cannot_force_const_mem (machine_mode, rtx);
198 static bool arm_legitimate_constant_p (machine_mode, rtx);
199 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
200 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
201 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
202 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
203 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
204 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
205 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
206 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
207 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
208 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
209 static void emit_constant_insn (rtx cond, rtx pattern);
210 static rtx_insn *emit_set_insn (rtx, rtx);
211 static rtx emit_multi_reg_push (unsigned long, unsigned long);
212 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
213 tree, bool);
214 static rtx arm_function_arg (cumulative_args_t, machine_mode,
215 const_tree, bool);
216 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
217 const_tree, bool);
218 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
219 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
220 const_tree);
221 static rtx aapcs_libcall_value (machine_mode);
222 static int aapcs_select_return_coproc (const_tree, const_tree);
224 #ifdef OBJECT_FORMAT_ELF
225 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
226 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
227 #endif
228 #ifndef ARM_PE
229 static void arm_encode_section_info (tree, rtx, int);
230 #endif
232 static void arm_file_end (void);
233 static void arm_file_start (void);
235 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
236 tree, int *, int);
237 static bool arm_pass_by_reference (cumulative_args_t,
238 machine_mode, const_tree, bool);
239 static bool arm_promote_prototypes (const_tree);
240 static bool arm_default_short_enums (void);
241 static bool arm_align_anon_bitfield (void);
242 static bool arm_return_in_msb (const_tree);
243 static bool arm_must_pass_in_stack (machine_mode, const_tree);
244 static bool arm_return_in_memory (const_tree, const_tree);
245 #if ARM_UNWIND_INFO
246 static void arm_unwind_emit (FILE *, rtx_insn *);
247 static bool arm_output_ttype (rtx);
248 static void arm_asm_emit_except_personality (rtx);
249 static void arm_asm_init_sections (void);
250 #endif
251 static rtx arm_dwarf_register_span (rtx);
253 static tree arm_cxx_guard_type (void);
254 static bool arm_cxx_guard_mask_bit (void);
255 static tree arm_get_cookie_size (tree);
256 static bool arm_cookie_has_size (void);
257 static bool arm_cxx_cdtor_returns_this (void);
258 static bool arm_cxx_key_method_may_be_inline (void);
259 static void arm_cxx_determine_class_data_visibility (tree);
260 static bool arm_cxx_class_data_always_comdat (void);
261 static bool arm_cxx_use_aeabi_atexit (void);
262 static void arm_init_libfuncs (void);
263 static tree arm_build_builtin_va_list (void);
264 static void arm_expand_builtin_va_start (tree, rtx);
265 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
266 static void arm_option_override (void);
267 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
268 static bool arm_macro_fusion_p (void);
269 static bool arm_cannot_copy_insn_p (rtx_insn *);
270 static int arm_issue_rate (void);
271 static int arm_first_cycle_multipass_dfa_lookahead (void);
272 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
273 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
274 static bool arm_output_addr_const_extra (FILE *, rtx);
275 static bool arm_allocate_stack_slots_for_args (void);
276 static bool arm_warn_func_return (tree);
277 static const char *arm_invalid_parameter_type (const_tree t);
278 static const char *arm_invalid_return_type (const_tree t);
279 static tree arm_promoted_type (const_tree t);
280 static tree arm_convert_to_type (tree type, tree expr);
281 static bool arm_scalar_mode_supported_p (machine_mode);
282 static bool arm_frame_pointer_required (void);
283 static bool arm_can_eliminate (const int, const int);
284 static void arm_asm_trampoline_template (FILE *);
285 static void arm_trampoline_init (rtx, tree, rtx);
286 static rtx arm_trampoline_adjust_address (rtx);
287 static rtx arm_pic_static_addr (rtx orig, rtx reg);
288 static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
289 static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
290 static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
291 static bool arm_array_mode_supported_p (machine_mode,
292 unsigned HOST_WIDE_INT);
293 static machine_mode arm_preferred_simd_mode (machine_mode);
294 static bool arm_class_likely_spilled_p (reg_class_t);
295 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
296 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
297 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
298 const_tree type,
299 int misalignment,
300 bool is_packed);
301 static void arm_conditional_register_usage (void);
302 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
303 static unsigned int arm_autovectorize_vector_sizes (void);
304 static int arm_default_branch_cost (bool, bool);
305 static int arm_cortex_a5_branch_cost (bool, bool);
306 static int arm_cortex_m_branch_cost (bool, bool);
307 static int arm_cortex_m7_branch_cost (bool, bool);
309 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
310 const unsigned char *sel);
312 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
314 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
315 tree vectype,
316 int misalign ATTRIBUTE_UNUSED);
317 static unsigned arm_add_stmt_cost (void *data, int count,
318 enum vect_cost_for_stmt kind,
319 struct _stmt_vec_info *stmt_info,
320 int misalign,
321 enum vect_cost_model_location where);
323 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
324 bool op0_preserve_value);
325 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
327 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
329 /* Table of machine attributes. */
330 static const struct attribute_spec arm_attribute_table[] =
332 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
333 affects_type_identity } */
334 /* Function calls made to this symbol must be done indirectly, because
335 it may lie outside of the 26 bit addressing range of a normal function
336 call. */
337 { "long_call", 0, 0, false, true, true, NULL, false },
338 /* Whereas these functions are always known to reside within the 26 bit
339 addressing range. */
340 { "short_call", 0, 0, false, true, true, NULL, false },
341 /* Specify the procedure call conventions for a function. */
342 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
343 false },
344 /* Interrupt Service Routines have special prologue and epilogue requirements. */
345 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
346 false },
347 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
348 false },
349 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
350 false },
351 #ifdef ARM_PE
352 /* ARM/PE has three new attributes:
353 interfacearm - ?
354 dllexport - for exporting a function/variable that will live in a dll
355 dllimport - for importing a function/variable from a dll
357 Microsoft allows multiple declspecs in one __declspec, separating
358 them with spaces. We do NOT support this. Instead, use __declspec
359 multiple times.
361 { "dllimport", 0, 0, true, false, false, NULL, false },
362 { "dllexport", 0, 0, true, false, false, NULL, false },
363 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
364 false },
365 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
366 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
367 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
368 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
369 false },
370 #endif
371 { NULL, 0, 0, false, false, false, NULL, false }
374 /* Initialize the GCC target structure. */
375 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
376 #undef TARGET_MERGE_DECL_ATTRIBUTES
377 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
378 #endif
380 #undef TARGET_LEGITIMIZE_ADDRESS
381 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
383 #undef TARGET_LRA_P
384 #define TARGET_LRA_P hook_bool_void_true
386 #undef TARGET_ATTRIBUTE_TABLE
387 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
389 #undef TARGET_ASM_FILE_START
390 #define TARGET_ASM_FILE_START arm_file_start
391 #undef TARGET_ASM_FILE_END
392 #define TARGET_ASM_FILE_END arm_file_end
394 #undef TARGET_ASM_ALIGNED_SI_OP
395 #define TARGET_ASM_ALIGNED_SI_OP NULL
396 #undef TARGET_ASM_INTEGER
397 #define TARGET_ASM_INTEGER arm_assemble_integer
399 #undef TARGET_PRINT_OPERAND
400 #define TARGET_PRINT_OPERAND arm_print_operand
401 #undef TARGET_PRINT_OPERAND_ADDRESS
402 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
403 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
404 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
406 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
407 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
409 #undef TARGET_ASM_FUNCTION_PROLOGUE
410 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
412 #undef TARGET_ASM_FUNCTION_EPILOGUE
413 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
415 #undef TARGET_OPTION_OVERRIDE
416 #define TARGET_OPTION_OVERRIDE arm_option_override
418 #undef TARGET_COMP_TYPE_ATTRIBUTES
419 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
421 #undef TARGET_SCHED_MACRO_FUSION_P
422 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
424 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
425 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
427 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
428 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
430 #undef TARGET_SCHED_ADJUST_COST
431 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
433 #undef TARGET_SCHED_REORDER
434 #define TARGET_SCHED_REORDER arm_sched_reorder
436 #undef TARGET_REGISTER_MOVE_COST
437 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
439 #undef TARGET_MEMORY_MOVE_COST
440 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
442 #undef TARGET_ENCODE_SECTION_INFO
443 #ifdef ARM_PE
444 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
445 #else
446 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
447 #endif
449 #undef TARGET_STRIP_NAME_ENCODING
450 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
452 #undef TARGET_ASM_INTERNAL_LABEL
453 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
455 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
456 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
458 #undef TARGET_FUNCTION_VALUE
459 #define TARGET_FUNCTION_VALUE arm_function_value
461 #undef TARGET_LIBCALL_VALUE
462 #define TARGET_LIBCALL_VALUE arm_libcall_value
464 #undef TARGET_FUNCTION_VALUE_REGNO_P
465 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
467 #undef TARGET_ASM_OUTPUT_MI_THUNK
468 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
469 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
470 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
472 #undef TARGET_RTX_COSTS
473 #define TARGET_RTX_COSTS arm_rtx_costs
474 #undef TARGET_ADDRESS_COST
475 #define TARGET_ADDRESS_COST arm_address_cost
477 #undef TARGET_SHIFT_TRUNCATION_MASK
478 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
479 #undef TARGET_VECTOR_MODE_SUPPORTED_P
480 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
481 #undef TARGET_ARRAY_MODE_SUPPORTED_P
482 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
483 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
484 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
485 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
486 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
487 arm_autovectorize_vector_sizes
489 #undef TARGET_MACHINE_DEPENDENT_REORG
490 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
492 #undef TARGET_INIT_BUILTINS
493 #define TARGET_INIT_BUILTINS arm_init_builtins
494 #undef TARGET_EXPAND_BUILTIN
495 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
496 #undef TARGET_BUILTIN_DECL
497 #define TARGET_BUILTIN_DECL arm_builtin_decl
499 #undef TARGET_INIT_LIBFUNCS
500 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
502 #undef TARGET_PROMOTE_FUNCTION_MODE
503 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
504 #undef TARGET_PROMOTE_PROTOTYPES
505 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
506 #undef TARGET_PASS_BY_REFERENCE
507 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
508 #undef TARGET_ARG_PARTIAL_BYTES
509 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
510 #undef TARGET_FUNCTION_ARG
511 #define TARGET_FUNCTION_ARG arm_function_arg
512 #undef TARGET_FUNCTION_ARG_ADVANCE
513 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
514 #undef TARGET_FUNCTION_ARG_BOUNDARY
515 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
517 #undef TARGET_SETUP_INCOMING_VARARGS
518 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
520 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
521 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
523 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
524 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
525 #undef TARGET_TRAMPOLINE_INIT
526 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
527 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
528 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
530 #undef TARGET_WARN_FUNC_RETURN
531 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
533 #undef TARGET_DEFAULT_SHORT_ENUMS
534 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
536 #undef TARGET_ALIGN_ANON_BITFIELD
537 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
539 #undef TARGET_NARROW_VOLATILE_BITFIELD
540 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
542 #undef TARGET_CXX_GUARD_TYPE
543 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
545 #undef TARGET_CXX_GUARD_MASK_BIT
546 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
548 #undef TARGET_CXX_GET_COOKIE_SIZE
549 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
551 #undef TARGET_CXX_COOKIE_HAS_SIZE
552 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
554 #undef TARGET_CXX_CDTOR_RETURNS_THIS
555 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
557 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
558 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
560 #undef TARGET_CXX_USE_AEABI_ATEXIT
561 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
563 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
564 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
565 arm_cxx_determine_class_data_visibility
567 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
568 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
570 #undef TARGET_RETURN_IN_MSB
571 #define TARGET_RETURN_IN_MSB arm_return_in_msb
573 #undef TARGET_RETURN_IN_MEMORY
574 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
576 #undef TARGET_MUST_PASS_IN_STACK
577 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
579 #if ARM_UNWIND_INFO
580 #undef TARGET_ASM_UNWIND_EMIT
581 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
583 /* EABI unwinding tables use a different format for the typeinfo tables. */
584 #undef TARGET_ASM_TTYPE
585 #define TARGET_ASM_TTYPE arm_output_ttype
587 #undef TARGET_ARM_EABI_UNWINDER
588 #define TARGET_ARM_EABI_UNWINDER true
590 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
591 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
593 #undef TARGET_ASM_INIT_SECTIONS
594 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
595 #endif /* ARM_UNWIND_INFO */
597 #undef TARGET_DWARF_REGISTER_SPAN
598 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
600 #undef TARGET_CANNOT_COPY_INSN_P
601 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
603 #ifdef HAVE_AS_TLS
604 #undef TARGET_HAVE_TLS
605 #define TARGET_HAVE_TLS true
606 #endif
608 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
609 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
611 #undef TARGET_LEGITIMATE_CONSTANT_P
612 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
614 #undef TARGET_CANNOT_FORCE_CONST_MEM
615 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
617 #undef TARGET_MAX_ANCHOR_OFFSET
618 #define TARGET_MAX_ANCHOR_OFFSET 4095
620 /* The minimum is set such that the total size of the block
621 for a particular anchor is -4088 + 1 + 4095 bytes, which is
622 divisible by eight, ensuring natural spacing of anchors. */
623 #undef TARGET_MIN_ANCHOR_OFFSET
624 #define TARGET_MIN_ANCHOR_OFFSET -4088
626 #undef TARGET_SCHED_ISSUE_RATE
627 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
629 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
630 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
631 arm_first_cycle_multipass_dfa_lookahead
633 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
634 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
635 arm_first_cycle_multipass_dfa_lookahead_guard
637 #undef TARGET_MANGLE_TYPE
638 #define TARGET_MANGLE_TYPE arm_mangle_type
640 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
641 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
643 #undef TARGET_BUILD_BUILTIN_VA_LIST
644 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
645 #undef TARGET_EXPAND_BUILTIN_VA_START
646 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
647 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
648 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
650 #ifdef HAVE_AS_TLS
651 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
652 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
653 #endif
655 #undef TARGET_LEGITIMATE_ADDRESS_P
656 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
658 #undef TARGET_PREFERRED_RELOAD_CLASS
659 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
661 #undef TARGET_INVALID_PARAMETER_TYPE
662 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
664 #undef TARGET_INVALID_RETURN_TYPE
665 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
667 #undef TARGET_PROMOTED_TYPE
668 #define TARGET_PROMOTED_TYPE arm_promoted_type
670 #undef TARGET_CONVERT_TO_TYPE
671 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
673 #undef TARGET_SCALAR_MODE_SUPPORTED_P
674 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
676 #undef TARGET_FRAME_POINTER_REQUIRED
677 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
679 #undef TARGET_CAN_ELIMINATE
680 #define TARGET_CAN_ELIMINATE arm_can_eliminate
682 #undef TARGET_CONDITIONAL_REGISTER_USAGE
683 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
685 #undef TARGET_CLASS_LIKELY_SPILLED_P
686 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
688 #undef TARGET_VECTORIZE_BUILTINS
689 #define TARGET_VECTORIZE_BUILTINS
691 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
692 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
693 arm_builtin_vectorized_function
695 #undef TARGET_VECTOR_ALIGNMENT
696 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
698 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
699 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
700 arm_vector_alignment_reachable
702 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
703 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
704 arm_builtin_support_vector_misalignment
706 #undef TARGET_PREFERRED_RENAME_CLASS
707 #define TARGET_PREFERRED_RENAME_CLASS \
708 arm_preferred_rename_class
710 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
711 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
712 arm_vectorize_vec_perm_const_ok
714 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
715 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
716 arm_builtin_vectorization_cost
717 #undef TARGET_VECTORIZE_ADD_STMT_COST
718 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
720 #undef TARGET_CANONICALIZE_COMPARISON
721 #define TARGET_CANONICALIZE_COMPARISON \
722 arm_canonicalize_comparison
724 #undef TARGET_ASAN_SHADOW_OFFSET
725 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
727 #undef MAX_INSN_PER_IT_BLOCK
728 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
730 #undef TARGET_CAN_USE_DOLOOP_P
731 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
733 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
734 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
736 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
737 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
739 #undef TARGET_SCHED_FUSION_PRIORITY
740 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
742 struct gcc_target targetm = TARGET_INITIALIZER;
744 /* Obstack for minipool constant handling. */
745 static struct obstack minipool_obstack;
746 static char * minipool_startobj;
748 /* The maximum number of insns skipped which
749 will be conditionalised if possible. */
750 static int max_insns_skipped = 5;
752 extern FILE * asm_out_file;
754 /* True if we are currently building a constant table. */
755 int making_const_table;
757 /* The processor for which instructions should be scheduled. */
758 enum processor_type arm_tune = arm_none;
760 /* The current tuning set. */
761 const struct tune_params *current_tune;
763 /* Which floating point hardware to schedule for. */
764 int arm_fpu_attr;
766 /* Which floating popint hardware to use. */
767 const struct arm_fpu_desc *arm_fpu_desc;
769 /* Used for Thumb call_via trampolines. */
770 rtx thumb_call_via_label[14];
771 static int thumb_call_reg_needed;
773 /* The bits in this mask specify which
774 instructions we are allowed to generate. */
775 unsigned long insn_flags = 0;
777 /* The bits in this mask specify which instruction scheduling options should
778 be used. */
779 unsigned long tune_flags = 0;
781 /* The highest ARM architecture version supported by the
782 target. */
783 enum base_architecture arm_base_arch = BASE_ARCH_0;
785 /* The following are used in the arm.md file as equivalents to bits
786 in the above two flag variables. */
788 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
789 int arm_arch3m = 0;
791 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
792 int arm_arch4 = 0;
794 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
795 int arm_arch4t = 0;
797 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
798 int arm_arch5 = 0;
800 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
801 int arm_arch5e = 0;
803 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
804 int arm_arch6 = 0;
806 /* Nonzero if this chip supports the ARM 6K extensions. */
807 int arm_arch6k = 0;
809 /* Nonzero if instructions present in ARMv6-M can be used. */
810 int arm_arch6m = 0;
812 /* Nonzero if this chip supports the ARM 7 extensions. */
813 int arm_arch7 = 0;
815 /* Nonzero if instructions not present in the 'M' profile can be used. */
816 int arm_arch_notm = 0;
818 /* Nonzero if instructions present in ARMv7E-M can be used. */
819 int arm_arch7em = 0;
821 /* Nonzero if instructions present in ARMv8 can be used. */
822 int arm_arch8 = 0;
824 /* Nonzero if this chip can benefit from load scheduling. */
825 int arm_ld_sched = 0;
827 /* Nonzero if this chip is a StrongARM. */
828 int arm_tune_strongarm = 0;
830 /* Nonzero if this chip supports Intel Wireless MMX technology. */
831 int arm_arch_iwmmxt = 0;
833 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
834 int arm_arch_iwmmxt2 = 0;
836 /* Nonzero if this chip is an XScale. */
837 int arm_arch_xscale = 0;
839 /* Nonzero if tuning for XScale */
840 int arm_tune_xscale = 0;
842 /* Nonzero if we want to tune for stores that access the write-buffer.
843 This typically means an ARM6 or ARM7 with MMU or MPU. */
844 int arm_tune_wbuf = 0;
846 /* Nonzero if tuning for Cortex-A9. */
847 int arm_tune_cortex_a9 = 0;
849 /* Nonzero if generating Thumb instructions. */
850 int thumb_code = 0;
852 /* Nonzero if generating Thumb-1 instructions. */
853 int thumb1_code = 0;
855 /* Nonzero if we should define __THUMB_INTERWORK__ in the
856 preprocessor.
857 XXX This is a bit of a hack, it's intended to help work around
858 problems in GLD which doesn't understand that armv5t code is
859 interworking clean. */
860 int arm_cpp_interwork = 0;
862 /* Nonzero if chip supports Thumb 2. */
863 int arm_arch_thumb2;
865 /* Nonzero if chip supports integer division instruction. */
866 int arm_arch_arm_hwdiv;
867 int arm_arch_thumb_hwdiv;
869 /* Nonzero if chip disallows volatile memory access in IT block. */
870 int arm_arch_no_volatile_ce;
872 /* Nonzero if we should use Neon to handle 64-bits operations rather
873 than core registers. */
874 int prefer_neon_for_64bits = 0;
876 /* Nonzero if we shouldn't use literal pools. */
877 bool arm_disable_literal_pool = false;
879 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
880 we must report the mode of the memory reference from
881 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
882 machine_mode output_memory_reference_mode;
884 /* The register number to be used for the PIC offset register. */
885 unsigned arm_pic_register = INVALID_REGNUM;
887 enum arm_pcs arm_pcs_default;
889 /* For an explanation of these variables, see final_prescan_insn below. */
890 int arm_ccfsm_state;
891 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
892 enum arm_cond_code arm_current_cc;
894 rtx arm_target_insn;
895 int arm_target_label;
896 /* The number of conditionally executed insns, including the current insn. */
897 int arm_condexec_count = 0;
898 /* A bitmask specifying the patterns for the IT block.
899 Zero means do not output an IT block before this insn. */
900 int arm_condexec_mask = 0;
901 /* The number of bits used in arm_condexec_mask. */
902 int arm_condexec_masklen = 0;
904 /* Nonzero if chip supports the ARMv8 CRC instructions. */
905 int arm_arch_crc = 0;
907 /* Nonzero if the core has a very small, high-latency, multiply unit. */
908 int arm_m_profile_small_mul = 0;
910 /* The condition codes of the ARM, and the inverse function. */
911 static const char * const arm_condition_codes[] =
913 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
914 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
917 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
918 int arm_regs_in_sequence[] =
920 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
923 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
924 #define streq(string1, string2) (strcmp (string1, string2) == 0)
926 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
927 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
928 | (1 << PIC_OFFSET_TABLE_REGNUM)))
930 /* Initialization code. */
932 struct processors
934 const char *const name;
935 enum processor_type core;
936 const char *arch;
937 enum base_architecture base_arch;
938 const unsigned long flags;
939 const struct tune_params *const tune;
943 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
944 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
945 prefetch_slots, \
946 l1_size, \
947 l1_line_size
949 /* arm generic vectorizer costs. */
950 static const
951 struct cpu_vec_costs arm_default_vec_cost = {
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 1, /* vec_unalign_load_cost. */
960 1, /* vec_unalign_store_cost. */
961 1, /* vec_store_cost. */
962 3, /* cond_taken_branch_cost. */
963 1, /* cond_not_taken_branch_cost. */
966 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
967 #include "aarch-cost-tables.h"
971 const struct cpu_cost_table cortexa9_extra_costs =
973 /* ALU */
975 0, /* arith. */
976 0, /* logical. */
977 0, /* shift. */
978 COSTS_N_INSNS (1), /* shift_reg. */
979 COSTS_N_INSNS (1), /* arith_shift. */
980 COSTS_N_INSNS (2), /* arith_shift_reg. */
981 0, /* log_shift. */
982 COSTS_N_INSNS (1), /* log_shift_reg. */
983 COSTS_N_INSNS (1), /* extend. */
984 COSTS_N_INSNS (2), /* extend_arith. */
985 COSTS_N_INSNS (1), /* bfi. */
986 COSTS_N_INSNS (1), /* bfx. */
987 0, /* clz. */
988 0, /* rev. */
989 0, /* non_exec. */
990 true /* non_exec_costs_exec. */
993 /* MULT SImode */
995 COSTS_N_INSNS (3), /* simple. */
996 COSTS_N_INSNS (3), /* flag_setting. */
997 COSTS_N_INSNS (2), /* extend. */
998 COSTS_N_INSNS (3), /* add. */
999 COSTS_N_INSNS (2), /* extend_add. */
1000 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1002 /* MULT DImode */
1004 0, /* simple (N/A). */
1005 0, /* flag_setting (N/A). */
1006 COSTS_N_INSNS (4), /* extend. */
1007 0, /* add (N/A). */
1008 COSTS_N_INSNS (4), /* extend_add. */
1009 0 /* idiv (N/A). */
1012 /* LD/ST */
1014 COSTS_N_INSNS (2), /* load. */
1015 COSTS_N_INSNS (2), /* load_sign_extend. */
1016 COSTS_N_INSNS (2), /* ldrd. */
1017 COSTS_N_INSNS (2), /* ldm_1st. */
1018 1, /* ldm_regs_per_insn_1st. */
1019 2, /* ldm_regs_per_insn_subsequent. */
1020 COSTS_N_INSNS (5), /* loadf. */
1021 COSTS_N_INSNS (5), /* loadd. */
1022 COSTS_N_INSNS (1), /* load_unaligned. */
1023 COSTS_N_INSNS (2), /* store. */
1024 COSTS_N_INSNS (2), /* strd. */
1025 COSTS_N_INSNS (2), /* stm_1st. */
1026 1, /* stm_regs_per_insn_1st. */
1027 2, /* stm_regs_per_insn_subsequent. */
1028 COSTS_N_INSNS (1), /* storef. */
1029 COSTS_N_INSNS (1), /* stored. */
1030 COSTS_N_INSNS (1) /* store_unaligned. */
1033 /* FP SFmode */
1035 COSTS_N_INSNS (14), /* div. */
1036 COSTS_N_INSNS (4), /* mult. */
1037 COSTS_N_INSNS (7), /* mult_addsub. */
1038 COSTS_N_INSNS (30), /* fma. */
1039 COSTS_N_INSNS (3), /* addsub. */
1040 COSTS_N_INSNS (1), /* fpconst. */
1041 COSTS_N_INSNS (1), /* neg. */
1042 COSTS_N_INSNS (3), /* compare. */
1043 COSTS_N_INSNS (3), /* widen. */
1044 COSTS_N_INSNS (3), /* narrow. */
1045 COSTS_N_INSNS (3), /* toint. */
1046 COSTS_N_INSNS (3), /* fromint. */
1047 COSTS_N_INSNS (3) /* roundint. */
1049 /* FP DFmode */
1051 COSTS_N_INSNS (24), /* div. */
1052 COSTS_N_INSNS (5), /* mult. */
1053 COSTS_N_INSNS (8), /* mult_addsub. */
1054 COSTS_N_INSNS (30), /* fma. */
1055 COSTS_N_INSNS (3), /* addsub. */
1056 COSTS_N_INSNS (1), /* fpconst. */
1057 COSTS_N_INSNS (1), /* neg. */
1058 COSTS_N_INSNS (3), /* compare. */
1059 COSTS_N_INSNS (3), /* widen. */
1060 COSTS_N_INSNS (3), /* narrow. */
1061 COSTS_N_INSNS (3), /* toint. */
1062 COSTS_N_INSNS (3), /* fromint. */
1063 COSTS_N_INSNS (3) /* roundint. */
1066 /* Vector */
1068 COSTS_N_INSNS (1) /* alu. */
1072 const struct cpu_cost_table cortexa8_extra_costs =
1074 /* ALU */
1076 0, /* arith. */
1077 0, /* logical. */
1078 COSTS_N_INSNS (1), /* shift. */
1079 0, /* shift_reg. */
1080 COSTS_N_INSNS (1), /* arith_shift. */
1081 0, /* arith_shift_reg. */
1082 COSTS_N_INSNS (1), /* log_shift. */
1083 0, /* log_shift_reg. */
1084 0, /* extend. */
1085 0, /* extend_arith. */
1086 0, /* bfi. */
1087 0, /* bfx. */
1088 0, /* clz. */
1089 0, /* rev. */
1090 0, /* non_exec. */
1091 true /* non_exec_costs_exec. */
1094 /* MULT SImode */
1096 COSTS_N_INSNS (1), /* simple. */
1097 COSTS_N_INSNS (1), /* flag_setting. */
1098 COSTS_N_INSNS (1), /* extend. */
1099 COSTS_N_INSNS (1), /* add. */
1100 COSTS_N_INSNS (1), /* extend_add. */
1101 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1103 /* MULT DImode */
1105 0, /* simple (N/A). */
1106 0, /* flag_setting (N/A). */
1107 COSTS_N_INSNS (2), /* extend. */
1108 0, /* add (N/A). */
1109 COSTS_N_INSNS (2), /* extend_add. */
1110 0 /* idiv (N/A). */
1113 /* LD/ST */
1115 COSTS_N_INSNS (1), /* load. */
1116 COSTS_N_INSNS (1), /* load_sign_extend. */
1117 COSTS_N_INSNS (1), /* ldrd. */
1118 COSTS_N_INSNS (1), /* ldm_1st. */
1119 1, /* ldm_regs_per_insn_1st. */
1120 2, /* ldm_regs_per_insn_subsequent. */
1121 COSTS_N_INSNS (1), /* loadf. */
1122 COSTS_N_INSNS (1), /* loadd. */
1123 COSTS_N_INSNS (1), /* load_unaligned. */
1124 COSTS_N_INSNS (1), /* store. */
1125 COSTS_N_INSNS (1), /* strd. */
1126 COSTS_N_INSNS (1), /* stm_1st. */
1127 1, /* stm_regs_per_insn_1st. */
1128 2, /* stm_regs_per_insn_subsequent. */
1129 COSTS_N_INSNS (1), /* storef. */
1130 COSTS_N_INSNS (1), /* stored. */
1131 COSTS_N_INSNS (1) /* store_unaligned. */
1134 /* FP SFmode */
1136 COSTS_N_INSNS (36), /* div. */
1137 COSTS_N_INSNS (11), /* mult. */
1138 COSTS_N_INSNS (20), /* mult_addsub. */
1139 COSTS_N_INSNS (30), /* fma. */
1140 COSTS_N_INSNS (9), /* addsub. */
1141 COSTS_N_INSNS (3), /* fpconst. */
1142 COSTS_N_INSNS (3), /* neg. */
1143 COSTS_N_INSNS (6), /* compare. */
1144 COSTS_N_INSNS (4), /* widen. */
1145 COSTS_N_INSNS (4), /* narrow. */
1146 COSTS_N_INSNS (8), /* toint. */
1147 COSTS_N_INSNS (8), /* fromint. */
1148 COSTS_N_INSNS (8) /* roundint. */
1150 /* FP DFmode */
1152 COSTS_N_INSNS (64), /* div. */
1153 COSTS_N_INSNS (16), /* mult. */
1154 COSTS_N_INSNS (25), /* mult_addsub. */
1155 COSTS_N_INSNS (30), /* fma. */
1156 COSTS_N_INSNS (9), /* addsub. */
1157 COSTS_N_INSNS (3), /* fpconst. */
1158 COSTS_N_INSNS (3), /* neg. */
1159 COSTS_N_INSNS (6), /* compare. */
1160 COSTS_N_INSNS (6), /* widen. */
1161 COSTS_N_INSNS (6), /* narrow. */
1162 COSTS_N_INSNS (8), /* toint. */
1163 COSTS_N_INSNS (8), /* fromint. */
1164 COSTS_N_INSNS (8) /* roundint. */
1167 /* Vector */
1169 COSTS_N_INSNS (1) /* alu. */
1173 const struct cpu_cost_table cortexa5_extra_costs =
1175 /* ALU */
1177 0, /* arith. */
1178 0, /* logical. */
1179 COSTS_N_INSNS (1), /* shift. */
1180 COSTS_N_INSNS (1), /* shift_reg. */
1181 COSTS_N_INSNS (1), /* arith_shift. */
1182 COSTS_N_INSNS (1), /* arith_shift_reg. */
1183 COSTS_N_INSNS (1), /* log_shift. */
1184 COSTS_N_INSNS (1), /* log_shift_reg. */
1185 COSTS_N_INSNS (1), /* extend. */
1186 COSTS_N_INSNS (1), /* extend_arith. */
1187 COSTS_N_INSNS (1), /* bfi. */
1188 COSTS_N_INSNS (1), /* bfx. */
1189 COSTS_N_INSNS (1), /* clz. */
1190 COSTS_N_INSNS (1), /* rev. */
1191 0, /* non_exec. */
1192 true /* non_exec_costs_exec. */
1196 /* MULT SImode */
1198 0, /* simple. */
1199 COSTS_N_INSNS (1), /* flag_setting. */
1200 COSTS_N_INSNS (1), /* extend. */
1201 COSTS_N_INSNS (1), /* add. */
1202 COSTS_N_INSNS (1), /* extend_add. */
1203 COSTS_N_INSNS (7) /* idiv. */
1205 /* MULT DImode */
1207 0, /* simple (N/A). */
1208 0, /* flag_setting (N/A). */
1209 COSTS_N_INSNS (1), /* extend. */
1210 0, /* add. */
1211 COSTS_N_INSNS (2), /* extend_add. */
1212 0 /* idiv (N/A). */
1215 /* LD/ST */
1217 COSTS_N_INSNS (1), /* load. */
1218 COSTS_N_INSNS (1), /* load_sign_extend. */
1219 COSTS_N_INSNS (6), /* ldrd. */
1220 COSTS_N_INSNS (1), /* ldm_1st. */
1221 1, /* ldm_regs_per_insn_1st. */
1222 2, /* ldm_regs_per_insn_subsequent. */
1223 COSTS_N_INSNS (2), /* loadf. */
1224 COSTS_N_INSNS (4), /* loadd. */
1225 COSTS_N_INSNS (1), /* load_unaligned. */
1226 COSTS_N_INSNS (1), /* store. */
1227 COSTS_N_INSNS (3), /* strd. */
1228 COSTS_N_INSNS (1), /* stm_1st. */
1229 1, /* stm_regs_per_insn_1st. */
1230 2, /* stm_regs_per_insn_subsequent. */
1231 COSTS_N_INSNS (2), /* storef. */
1232 COSTS_N_INSNS (2), /* stored. */
1233 COSTS_N_INSNS (1) /* store_unaligned. */
1236 /* FP SFmode */
1238 COSTS_N_INSNS (15), /* div. */
1239 COSTS_N_INSNS (3), /* mult. */
1240 COSTS_N_INSNS (7), /* mult_addsub. */
1241 COSTS_N_INSNS (7), /* fma. */
1242 COSTS_N_INSNS (3), /* addsub. */
1243 COSTS_N_INSNS (3), /* fpconst. */
1244 COSTS_N_INSNS (3), /* neg. */
1245 COSTS_N_INSNS (3), /* compare. */
1246 COSTS_N_INSNS (3), /* widen. */
1247 COSTS_N_INSNS (3), /* narrow. */
1248 COSTS_N_INSNS (3), /* toint. */
1249 COSTS_N_INSNS (3), /* fromint. */
1250 COSTS_N_INSNS (3) /* roundint. */
1252 /* FP DFmode */
1254 COSTS_N_INSNS (30), /* div. */
1255 COSTS_N_INSNS (6), /* mult. */
1256 COSTS_N_INSNS (10), /* mult_addsub. */
1257 COSTS_N_INSNS (7), /* fma. */
1258 COSTS_N_INSNS (3), /* addsub. */
1259 COSTS_N_INSNS (3), /* fpconst. */
1260 COSTS_N_INSNS (3), /* neg. */
1261 COSTS_N_INSNS (3), /* compare. */
1262 COSTS_N_INSNS (3), /* widen. */
1263 COSTS_N_INSNS (3), /* narrow. */
1264 COSTS_N_INSNS (3), /* toint. */
1265 COSTS_N_INSNS (3), /* fromint. */
1266 COSTS_N_INSNS (3) /* roundint. */
1269 /* Vector */
1271 COSTS_N_INSNS (1) /* alu. */
1276 const struct cpu_cost_table cortexa7_extra_costs =
1278 /* ALU */
1280 0, /* arith. */
1281 0, /* logical. */
1282 COSTS_N_INSNS (1), /* shift. */
1283 COSTS_N_INSNS (1), /* shift_reg. */
1284 COSTS_N_INSNS (1), /* arith_shift. */
1285 COSTS_N_INSNS (1), /* arith_shift_reg. */
1286 COSTS_N_INSNS (1), /* log_shift. */
1287 COSTS_N_INSNS (1), /* log_shift_reg. */
1288 COSTS_N_INSNS (1), /* extend. */
1289 COSTS_N_INSNS (1), /* extend_arith. */
1290 COSTS_N_INSNS (1), /* bfi. */
1291 COSTS_N_INSNS (1), /* bfx. */
1292 COSTS_N_INSNS (1), /* clz. */
1293 COSTS_N_INSNS (1), /* rev. */
1294 0, /* non_exec. */
1295 true /* non_exec_costs_exec. */
1299 /* MULT SImode */
1301 0, /* simple. */
1302 COSTS_N_INSNS (1), /* flag_setting. */
1303 COSTS_N_INSNS (1), /* extend. */
1304 COSTS_N_INSNS (1), /* add. */
1305 COSTS_N_INSNS (1), /* extend_add. */
1306 COSTS_N_INSNS (7) /* idiv. */
1308 /* MULT DImode */
1310 0, /* simple (N/A). */
1311 0, /* flag_setting (N/A). */
1312 COSTS_N_INSNS (1), /* extend. */
1313 0, /* add. */
1314 COSTS_N_INSNS (2), /* extend_add. */
1315 0 /* idiv (N/A). */
1318 /* LD/ST */
1320 COSTS_N_INSNS (1), /* load. */
1321 COSTS_N_INSNS (1), /* load_sign_extend. */
1322 COSTS_N_INSNS (3), /* ldrd. */
1323 COSTS_N_INSNS (1), /* ldm_1st. */
1324 1, /* ldm_regs_per_insn_1st. */
1325 2, /* ldm_regs_per_insn_subsequent. */
1326 COSTS_N_INSNS (2), /* loadf. */
1327 COSTS_N_INSNS (2), /* loadd. */
1328 COSTS_N_INSNS (1), /* load_unaligned. */
1329 COSTS_N_INSNS (1), /* store. */
1330 COSTS_N_INSNS (3), /* strd. */
1331 COSTS_N_INSNS (1), /* stm_1st. */
1332 1, /* stm_regs_per_insn_1st. */
1333 2, /* stm_regs_per_insn_subsequent. */
1334 COSTS_N_INSNS (2), /* storef. */
1335 COSTS_N_INSNS (2), /* stored. */
1336 COSTS_N_INSNS (1) /* store_unaligned. */
1339 /* FP SFmode */
1341 COSTS_N_INSNS (15), /* div. */
1342 COSTS_N_INSNS (3), /* mult. */
1343 COSTS_N_INSNS (7), /* mult_addsub. */
1344 COSTS_N_INSNS (7), /* fma. */
1345 COSTS_N_INSNS (3), /* addsub. */
1346 COSTS_N_INSNS (3), /* fpconst. */
1347 COSTS_N_INSNS (3), /* neg. */
1348 COSTS_N_INSNS (3), /* compare. */
1349 COSTS_N_INSNS (3), /* widen. */
1350 COSTS_N_INSNS (3), /* narrow. */
1351 COSTS_N_INSNS (3), /* toint. */
1352 COSTS_N_INSNS (3), /* fromint. */
1353 COSTS_N_INSNS (3) /* roundint. */
1355 /* FP DFmode */
1357 COSTS_N_INSNS (30), /* div. */
1358 COSTS_N_INSNS (6), /* mult. */
1359 COSTS_N_INSNS (10), /* mult_addsub. */
1360 COSTS_N_INSNS (7), /* fma. */
1361 COSTS_N_INSNS (3), /* addsub. */
1362 COSTS_N_INSNS (3), /* fpconst. */
1363 COSTS_N_INSNS (3), /* neg. */
1364 COSTS_N_INSNS (3), /* compare. */
1365 COSTS_N_INSNS (3), /* widen. */
1366 COSTS_N_INSNS (3), /* narrow. */
1367 COSTS_N_INSNS (3), /* toint. */
1368 COSTS_N_INSNS (3), /* fromint. */
1369 COSTS_N_INSNS (3) /* roundint. */
1372 /* Vector */
1374 COSTS_N_INSNS (1) /* alu. */
1378 const struct cpu_cost_table cortexa12_extra_costs =
1380 /* ALU */
1382 0, /* arith. */
1383 0, /* logical. */
1384 0, /* shift. */
1385 COSTS_N_INSNS (1), /* shift_reg. */
1386 COSTS_N_INSNS (1), /* arith_shift. */
1387 COSTS_N_INSNS (1), /* arith_shift_reg. */
1388 COSTS_N_INSNS (1), /* log_shift. */
1389 COSTS_N_INSNS (1), /* log_shift_reg. */
1390 0, /* extend. */
1391 COSTS_N_INSNS (1), /* extend_arith. */
1392 0, /* bfi. */
1393 COSTS_N_INSNS (1), /* bfx. */
1394 COSTS_N_INSNS (1), /* clz. */
1395 COSTS_N_INSNS (1), /* rev. */
1396 0, /* non_exec. */
1397 true /* non_exec_costs_exec. */
1399 /* MULT SImode */
1402 COSTS_N_INSNS (2), /* simple. */
1403 COSTS_N_INSNS (3), /* flag_setting. */
1404 COSTS_N_INSNS (2), /* extend. */
1405 COSTS_N_INSNS (3), /* add. */
1406 COSTS_N_INSNS (2), /* extend_add. */
1407 COSTS_N_INSNS (18) /* idiv. */
1409 /* MULT DImode */
1411 0, /* simple (N/A). */
1412 0, /* flag_setting (N/A). */
1413 COSTS_N_INSNS (3), /* extend. */
1414 0, /* add (N/A). */
1415 COSTS_N_INSNS (3), /* extend_add. */
1416 0 /* idiv (N/A). */
1419 /* LD/ST */
1421 COSTS_N_INSNS (3), /* load. */
1422 COSTS_N_INSNS (3), /* load_sign_extend. */
1423 COSTS_N_INSNS (3), /* ldrd. */
1424 COSTS_N_INSNS (3), /* ldm_1st. */
1425 1, /* ldm_regs_per_insn_1st. */
1426 2, /* ldm_regs_per_insn_subsequent. */
1427 COSTS_N_INSNS (3), /* loadf. */
1428 COSTS_N_INSNS (3), /* loadd. */
1429 0, /* load_unaligned. */
1430 0, /* store. */
1431 0, /* strd. */
1432 0, /* stm_1st. */
1433 1, /* stm_regs_per_insn_1st. */
1434 2, /* stm_regs_per_insn_subsequent. */
1435 COSTS_N_INSNS (2), /* storef. */
1436 COSTS_N_INSNS (2), /* stored. */
1437 0 /* store_unaligned. */
1440 /* FP SFmode */
1442 COSTS_N_INSNS (17), /* div. */
1443 COSTS_N_INSNS (4), /* mult. */
1444 COSTS_N_INSNS (8), /* mult_addsub. */
1445 COSTS_N_INSNS (8), /* fma. */
1446 COSTS_N_INSNS (4), /* addsub. */
1447 COSTS_N_INSNS (2), /* fpconst. */
1448 COSTS_N_INSNS (2), /* neg. */
1449 COSTS_N_INSNS (2), /* compare. */
1450 COSTS_N_INSNS (4), /* widen. */
1451 COSTS_N_INSNS (4), /* narrow. */
1452 COSTS_N_INSNS (4), /* toint. */
1453 COSTS_N_INSNS (4), /* fromint. */
1454 COSTS_N_INSNS (4) /* roundint. */
1456 /* FP DFmode */
1458 COSTS_N_INSNS (31), /* div. */
1459 COSTS_N_INSNS (4), /* mult. */
1460 COSTS_N_INSNS (8), /* mult_addsub. */
1461 COSTS_N_INSNS (8), /* fma. */
1462 COSTS_N_INSNS (4), /* addsub. */
1463 COSTS_N_INSNS (2), /* fpconst. */
1464 COSTS_N_INSNS (2), /* neg. */
1465 COSTS_N_INSNS (2), /* compare. */
1466 COSTS_N_INSNS (4), /* widen. */
1467 COSTS_N_INSNS (4), /* narrow. */
1468 COSTS_N_INSNS (4), /* toint. */
1469 COSTS_N_INSNS (4), /* fromint. */
1470 COSTS_N_INSNS (4) /* roundint. */
1473 /* Vector */
1475 COSTS_N_INSNS (1) /* alu. */
1479 const struct cpu_cost_table cortexa15_extra_costs =
1481 /* ALU */
1483 0, /* arith. */
1484 0, /* logical. */
1485 0, /* shift. */
1486 0, /* shift_reg. */
1487 COSTS_N_INSNS (1), /* arith_shift. */
1488 COSTS_N_INSNS (1), /* arith_shift_reg. */
1489 COSTS_N_INSNS (1), /* log_shift. */
1490 COSTS_N_INSNS (1), /* log_shift_reg. */
1491 0, /* extend. */
1492 COSTS_N_INSNS (1), /* extend_arith. */
1493 COSTS_N_INSNS (1), /* bfi. */
1494 0, /* bfx. */
1495 0, /* clz. */
1496 0, /* rev. */
1497 0, /* non_exec. */
1498 true /* non_exec_costs_exec. */
1500 /* MULT SImode */
1503 COSTS_N_INSNS (2), /* simple. */
1504 COSTS_N_INSNS (3), /* flag_setting. */
1505 COSTS_N_INSNS (2), /* extend. */
1506 COSTS_N_INSNS (2), /* add. */
1507 COSTS_N_INSNS (2), /* extend_add. */
1508 COSTS_N_INSNS (18) /* idiv. */
1510 /* MULT DImode */
1512 0, /* simple (N/A). */
1513 0, /* flag_setting (N/A). */
1514 COSTS_N_INSNS (3), /* extend. */
1515 0, /* add (N/A). */
1516 COSTS_N_INSNS (3), /* extend_add. */
1517 0 /* idiv (N/A). */
1520 /* LD/ST */
1522 COSTS_N_INSNS (3), /* load. */
1523 COSTS_N_INSNS (3), /* load_sign_extend. */
1524 COSTS_N_INSNS (3), /* ldrd. */
1525 COSTS_N_INSNS (4), /* ldm_1st. */
1526 1, /* ldm_regs_per_insn_1st. */
1527 2, /* ldm_regs_per_insn_subsequent. */
1528 COSTS_N_INSNS (4), /* loadf. */
1529 COSTS_N_INSNS (4), /* loadd. */
1530 0, /* load_unaligned. */
1531 0, /* store. */
1532 0, /* strd. */
1533 COSTS_N_INSNS (1), /* stm_1st. */
1534 1, /* stm_regs_per_insn_1st. */
1535 2, /* stm_regs_per_insn_subsequent. */
1536 0, /* storef. */
1537 0, /* stored. */
1538 0 /* store_unaligned. */
1541 /* FP SFmode */
1543 COSTS_N_INSNS (17), /* div. */
1544 COSTS_N_INSNS (4), /* mult. */
1545 COSTS_N_INSNS (8), /* mult_addsub. */
1546 COSTS_N_INSNS (8), /* fma. */
1547 COSTS_N_INSNS (4), /* addsub. */
1548 COSTS_N_INSNS (2), /* fpconst. */
1549 COSTS_N_INSNS (2), /* neg. */
1550 COSTS_N_INSNS (5), /* compare. */
1551 COSTS_N_INSNS (4), /* widen. */
1552 COSTS_N_INSNS (4), /* narrow. */
1553 COSTS_N_INSNS (4), /* toint. */
1554 COSTS_N_INSNS (4), /* fromint. */
1555 COSTS_N_INSNS (4) /* roundint. */
1557 /* FP DFmode */
1559 COSTS_N_INSNS (31), /* div. */
1560 COSTS_N_INSNS (4), /* mult. */
1561 COSTS_N_INSNS (8), /* mult_addsub. */
1562 COSTS_N_INSNS (8), /* fma. */
1563 COSTS_N_INSNS (4), /* addsub. */
1564 COSTS_N_INSNS (2), /* fpconst. */
1565 COSTS_N_INSNS (2), /* neg. */
1566 COSTS_N_INSNS (2), /* compare. */
1567 COSTS_N_INSNS (4), /* widen. */
1568 COSTS_N_INSNS (4), /* narrow. */
1569 COSTS_N_INSNS (4), /* toint. */
1570 COSTS_N_INSNS (4), /* fromint. */
1571 COSTS_N_INSNS (4) /* roundint. */
1574 /* Vector */
1576 COSTS_N_INSNS (1) /* alu. */
1580 const struct cpu_cost_table v7m_extra_costs =
1582 /* ALU */
1584 0, /* arith. */
1585 0, /* logical. */
1586 0, /* shift. */
1587 0, /* shift_reg. */
1588 0, /* arith_shift. */
1589 COSTS_N_INSNS (1), /* arith_shift_reg. */
1590 0, /* log_shift. */
1591 COSTS_N_INSNS (1), /* log_shift_reg. */
1592 0, /* extend. */
1593 COSTS_N_INSNS (1), /* extend_arith. */
1594 0, /* bfi. */
1595 0, /* bfx. */
1596 0, /* clz. */
1597 0, /* rev. */
1598 COSTS_N_INSNS (1), /* non_exec. */
1599 false /* non_exec_costs_exec. */
1602 /* MULT SImode */
1604 COSTS_N_INSNS (1), /* simple. */
1605 COSTS_N_INSNS (1), /* flag_setting. */
1606 COSTS_N_INSNS (2), /* extend. */
1607 COSTS_N_INSNS (1), /* add. */
1608 COSTS_N_INSNS (3), /* extend_add. */
1609 COSTS_N_INSNS (8) /* idiv. */
1611 /* MULT DImode */
1613 0, /* simple (N/A). */
1614 0, /* flag_setting (N/A). */
1615 COSTS_N_INSNS (2), /* extend. */
1616 0, /* add (N/A). */
1617 COSTS_N_INSNS (3), /* extend_add. */
1618 0 /* idiv (N/A). */
1621 /* LD/ST */
1623 COSTS_N_INSNS (2), /* load. */
1624 0, /* load_sign_extend. */
1625 COSTS_N_INSNS (3), /* ldrd. */
1626 COSTS_N_INSNS (2), /* ldm_1st. */
1627 1, /* ldm_regs_per_insn_1st. */
1628 1, /* ldm_regs_per_insn_subsequent. */
1629 COSTS_N_INSNS (2), /* loadf. */
1630 COSTS_N_INSNS (3), /* loadd. */
1631 COSTS_N_INSNS (1), /* load_unaligned. */
1632 COSTS_N_INSNS (2), /* store. */
1633 COSTS_N_INSNS (3), /* strd. */
1634 COSTS_N_INSNS (2), /* stm_1st. */
1635 1, /* stm_regs_per_insn_1st. */
1636 1, /* stm_regs_per_insn_subsequent. */
1637 COSTS_N_INSNS (2), /* storef. */
1638 COSTS_N_INSNS (3), /* stored. */
1639 COSTS_N_INSNS (1) /* store_unaligned. */
1642 /* FP SFmode */
1644 COSTS_N_INSNS (7), /* div. */
1645 COSTS_N_INSNS (2), /* mult. */
1646 COSTS_N_INSNS (5), /* mult_addsub. */
1647 COSTS_N_INSNS (3), /* fma. */
1648 COSTS_N_INSNS (1), /* addsub. */
1649 0, /* fpconst. */
1650 0, /* neg. */
1651 0, /* compare. */
1652 0, /* widen. */
1653 0, /* narrow. */
1654 0, /* toint. */
1655 0, /* fromint. */
1656 0 /* roundint. */
1658 /* FP DFmode */
1660 COSTS_N_INSNS (15), /* div. */
1661 COSTS_N_INSNS (5), /* mult. */
1662 COSTS_N_INSNS (7), /* mult_addsub. */
1663 COSTS_N_INSNS (7), /* fma. */
1664 COSTS_N_INSNS (3), /* addsub. */
1665 0, /* fpconst. */
1666 0, /* neg. */
1667 0, /* compare. */
1668 0, /* widen. */
1669 0, /* narrow. */
1670 0, /* toint. */
1671 0, /* fromint. */
1672 0 /* roundint. */
1675 /* Vector */
1677 COSTS_N_INSNS (1) /* alu. */
1681 #define ARM_FUSE_NOTHING (0)
1682 #define ARM_FUSE_MOVW_MOVT (1 << 0)
1684 const struct tune_params arm_slowmul_tune =
1686 arm_slowmul_rtx_costs,
1687 NULL,
1688 NULL, /* Sched adj cost. */
1689 3, /* Constant limit. */
1690 5, /* Max cond insns. */
1691 ARM_PREFETCH_NOT_BENEFICIAL,
1692 true, /* Prefer constant pool. */
1693 arm_default_branch_cost,
1694 false, /* Prefer LDRD/STRD. */
1695 {true, true}, /* Prefer non short circuit. */
1696 &arm_default_vec_cost, /* Vectorizer costs. */
1697 false, /* Prefer Neon for 64-bits bitops. */
1698 false, false, /* Prefer 32-bit encodings. */
1699 false, /* Prefer Neon for stringops. */
1700 8, /* Maximum insns to inline memset. */
1701 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1702 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
1705 const struct tune_params arm_fastmul_tune =
1707 arm_fastmul_rtx_costs,
1708 NULL,
1709 NULL, /* Sched adj cost. */
1710 1, /* Constant limit. */
1711 5, /* Max cond insns. */
1712 ARM_PREFETCH_NOT_BENEFICIAL,
1713 true, /* Prefer constant pool. */
1714 arm_default_branch_cost,
1715 false, /* Prefer LDRD/STRD. */
1716 {true, true}, /* Prefer non short circuit. */
1717 &arm_default_vec_cost, /* Vectorizer costs. */
1718 false, /* Prefer Neon for 64-bits bitops. */
1719 false, false, /* Prefer 32-bit encodings. */
1720 false, /* Prefer Neon for stringops. */
1721 8, /* Maximum insns to inline memset. */
1722 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1723 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
1726 /* StrongARM has early execution of branches, so a sequence that is worth
1727 skipping is shorter. Set max_insns_skipped to a lower value. */
1729 const struct tune_params arm_strongarm_tune =
1731 arm_fastmul_rtx_costs,
1732 NULL,
1733 NULL, /* Sched adj cost. */
1734 1, /* Constant limit. */
1735 3, /* Max cond insns. */
1736 ARM_PREFETCH_NOT_BENEFICIAL,
1737 true, /* Prefer constant pool. */
1738 arm_default_branch_cost,
1739 false, /* Prefer LDRD/STRD. */
1740 {true, true}, /* Prefer non short circuit. */
1741 &arm_default_vec_cost, /* Vectorizer costs. */
1742 false, /* Prefer Neon for 64-bits bitops. */
1743 false, false, /* Prefer 32-bit encodings. */
1744 false, /* Prefer Neon for stringops. */
1745 8, /* Maximum insns to inline memset. */
1746 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1747 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
1750 const struct tune_params arm_xscale_tune =
1752 arm_xscale_rtx_costs,
1753 NULL,
1754 xscale_sched_adjust_cost,
1755 2, /* Constant limit. */
1756 3, /* Max cond insns. */
1757 ARM_PREFETCH_NOT_BENEFICIAL,
1758 true, /* Prefer constant pool. */
1759 arm_default_branch_cost,
1760 false, /* Prefer LDRD/STRD. */
1761 {true, true}, /* Prefer non short circuit. */
1762 &arm_default_vec_cost, /* Vectorizer costs. */
1763 false, /* Prefer Neon for 64-bits bitops. */
1764 false, false, /* Prefer 32-bit encodings. */
1765 false, /* Prefer Neon for stringops. */
1766 8, /* Maximum insns to inline memset. */
1767 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1768 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
1771 const struct tune_params arm_9e_tune =
1773 arm_9e_rtx_costs,
1774 NULL,
1775 NULL, /* Sched adj cost. */
1776 1, /* Constant limit. */
1777 5, /* Max cond insns. */
1778 ARM_PREFETCH_NOT_BENEFICIAL,
1779 true, /* Prefer constant pool. */
1780 arm_default_branch_cost,
1781 false, /* Prefer LDRD/STRD. */
1782 {true, true}, /* Prefer non short circuit. */
1783 &arm_default_vec_cost, /* Vectorizer costs. */
1784 false, /* Prefer Neon for 64-bits bitops. */
1785 false, false, /* Prefer 32-bit encodings. */
1786 false, /* Prefer Neon for stringops. */
1787 8, /* Maximum insns to inline memset. */
1788 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1789 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
1792 const struct tune_params arm_v6t2_tune =
1794 arm_9e_rtx_costs,
1795 NULL,
1796 NULL, /* Sched adj cost. */
1797 1, /* Constant limit. */
1798 5, /* Max cond insns. */
1799 ARM_PREFETCH_NOT_BENEFICIAL,
1800 false, /* Prefer constant pool. */
1801 arm_default_branch_cost,
1802 false, /* Prefer LDRD/STRD. */
1803 {true, true}, /* Prefer non short circuit. */
1804 &arm_default_vec_cost, /* Vectorizer costs. */
1805 false, /* Prefer Neon for 64-bits bitops. */
1806 false, false, /* Prefer 32-bit encodings. */
1807 false, /* Prefer Neon for stringops. */
1808 8, /* Maximum insns to inline memset. */
1809 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1810 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
1813 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1814 const struct tune_params arm_cortex_tune =
1816 arm_9e_rtx_costs,
1817 &generic_extra_costs,
1818 NULL, /* Sched adj cost. */
1819 1, /* Constant limit. */
1820 5, /* Max cond insns. */
1821 ARM_PREFETCH_NOT_BENEFICIAL,
1822 false, /* Prefer constant pool. */
1823 arm_default_branch_cost,
1824 false, /* Prefer LDRD/STRD. */
1825 {true, true}, /* Prefer non short circuit. */
1826 &arm_default_vec_cost, /* Vectorizer costs. */
1827 false, /* Prefer Neon for 64-bits bitops. */
1828 false, false, /* Prefer 32-bit encodings. */
1829 false, /* Prefer Neon for stringops. */
1830 8, /* Maximum insns to inline memset. */
1831 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1832 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
1835 const struct tune_params arm_cortex_a8_tune =
1837 arm_9e_rtx_costs,
1838 &cortexa8_extra_costs,
1839 NULL, /* Sched adj cost. */
1840 1, /* Constant limit. */
1841 5, /* Max cond insns. */
1842 ARM_PREFETCH_NOT_BENEFICIAL,
1843 false, /* Prefer constant pool. */
1844 arm_default_branch_cost,
1845 false, /* Prefer LDRD/STRD. */
1846 {true, true}, /* Prefer non short circuit. */
1847 &arm_default_vec_cost, /* Vectorizer costs. */
1848 false, /* Prefer Neon for 64-bits bitops. */
1849 false, false, /* Prefer 32-bit encodings. */
1850 true, /* Prefer Neon for stringops. */
1851 8, /* Maximum insns to inline memset. */
1852 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1853 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
1856 const struct tune_params arm_cortex_a7_tune =
1858 arm_9e_rtx_costs,
1859 &cortexa7_extra_costs,
1860 NULL,
1861 1, /* Constant limit. */
1862 5, /* Max cond insns. */
1863 ARM_PREFETCH_NOT_BENEFICIAL,
1864 false, /* Prefer constant pool. */
1865 arm_default_branch_cost,
1866 false, /* Prefer LDRD/STRD. */
1867 {true, true}, /* Prefer non short circuit. */
1868 &arm_default_vec_cost, /* Vectorizer costs. */
1869 false, /* Prefer Neon for 64-bits bitops. */
1870 false, false, /* Prefer 32-bit encodings. */
1871 true, /* Prefer Neon for stringops. */
1872 8, /* Maximum insns to inline memset. */
1873 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1874 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
1877 const struct tune_params arm_cortex_a15_tune =
1879 arm_9e_rtx_costs,
1880 &cortexa15_extra_costs,
1881 NULL, /* Sched adj cost. */
1882 1, /* Constant limit. */
1883 2, /* Max cond insns. */
1884 ARM_PREFETCH_NOT_BENEFICIAL,
1885 false, /* Prefer constant pool. */
1886 arm_default_branch_cost,
1887 true, /* Prefer LDRD/STRD. */
1888 {true, true}, /* Prefer non short circuit. */
1889 &arm_default_vec_cost, /* Vectorizer costs. */
1890 false, /* Prefer Neon for 64-bits bitops. */
1891 true, true, /* Prefer 32-bit encodings. */
1892 true, /* Prefer Neon for stringops. */
1893 8, /* Maximum insns to inline memset. */
1894 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1895 ARM_SCHED_AUTOPREF_FULL /* Sched L2 autopref. */
1898 const struct tune_params arm_cortex_a53_tune =
1900 arm_9e_rtx_costs,
1901 &cortexa53_extra_costs,
1902 NULL, /* Scheduler cost adjustment. */
1903 1, /* Constant limit. */
1904 5, /* Max cond insns. */
1905 ARM_PREFETCH_NOT_BENEFICIAL,
1906 false, /* Prefer constant pool. */
1907 arm_default_branch_cost,
1908 false, /* Prefer LDRD/STRD. */
1909 {true, true}, /* Prefer non short circuit. */
1910 &arm_default_vec_cost, /* Vectorizer costs. */
1911 false, /* Prefer Neon for 64-bits bitops. */
1912 false, false, /* Prefer 32-bit encodings. */
1913 true, /* Prefer Neon for stringops. */
1914 8, /* Maximum insns to inline memset. */
1915 ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */
1916 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
1919 const struct tune_params arm_cortex_a57_tune =
1921 arm_9e_rtx_costs,
1922 &cortexa57_extra_costs,
1923 NULL, /* Scheduler cost adjustment. */
1924 1, /* Constant limit. */
1925 2, /* Max cond insns. */
1926 ARM_PREFETCH_NOT_BENEFICIAL,
1927 false, /* Prefer constant pool. */
1928 arm_default_branch_cost,
1929 true, /* Prefer LDRD/STRD. */
1930 {true, true}, /* Prefer non short circuit. */
1931 &arm_default_vec_cost, /* Vectorizer costs. */
1932 false, /* Prefer Neon for 64-bits bitops. */
1933 true, true, /* Prefer 32-bit encodings. */
1934 true, /* Prefer Neon for stringops. */
1935 8, /* Maximum insns to inline memset. */
1936 ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */
1937 ARM_SCHED_AUTOPREF_FULL /* Sched L2 autopref. */
1940 const struct tune_params arm_xgene1_tune =
1942 arm_9e_rtx_costs,
1943 &xgene1_extra_costs,
1944 NULL, /* Scheduler cost adjustment. */
1945 1, /* Constant limit. */
1946 2, /* Max cond insns. */
1947 ARM_PREFETCH_NOT_BENEFICIAL,
1948 false, /* Prefer constant pool. */
1949 arm_default_branch_cost,
1950 true, /* Prefer LDRD/STRD. */
1951 {true, true}, /* Prefer non short circuit. */
1952 &arm_default_vec_cost, /* Vectorizer costs. */
1953 false, /* Prefer Neon for 64-bits bitops. */
1954 true, true, /* Prefer 32-bit encodings. */
1955 false, /* Prefer Neon for stringops. */
1956 32, /* Maximum insns to inline memset. */
1957 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1958 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
1961 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1962 less appealing. Set max_insns_skipped to a low value. */
1964 const struct tune_params arm_cortex_a5_tune =
1966 arm_9e_rtx_costs,
1967 &cortexa5_extra_costs,
1968 NULL, /* Sched adj cost. */
1969 1, /* Constant limit. */
1970 1, /* Max cond insns. */
1971 ARM_PREFETCH_NOT_BENEFICIAL,
1972 false, /* Prefer constant pool. */
1973 arm_cortex_a5_branch_cost,
1974 false, /* Prefer LDRD/STRD. */
1975 {false, false}, /* Prefer non short circuit. */
1976 &arm_default_vec_cost, /* Vectorizer costs. */
1977 false, /* Prefer Neon for 64-bits bitops. */
1978 false, false, /* Prefer 32-bit encodings. */
1979 true, /* Prefer Neon for stringops. */
1980 8, /* Maximum insns to inline memset. */
1981 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1982 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
1985 const struct tune_params arm_cortex_a9_tune =
1987 arm_9e_rtx_costs,
1988 &cortexa9_extra_costs,
1989 cortex_a9_sched_adjust_cost,
1990 1, /* Constant limit. */
1991 5, /* Max cond insns. */
1992 ARM_PREFETCH_BENEFICIAL(4,32,32),
1993 false, /* Prefer constant pool. */
1994 arm_default_branch_cost,
1995 false, /* Prefer LDRD/STRD. */
1996 {true, true}, /* Prefer non short circuit. */
1997 &arm_default_vec_cost, /* Vectorizer costs. */
1998 false, /* Prefer Neon for 64-bits bitops. */
1999 false, false, /* Prefer 32-bit encodings. */
2000 false, /* Prefer Neon for stringops. */
2001 8, /* Maximum insns to inline memset. */
2002 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
2003 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
2006 const struct tune_params arm_cortex_a12_tune =
2008 arm_9e_rtx_costs,
2009 &cortexa12_extra_costs,
2010 NULL, /* Sched adj cost. */
2011 1, /* Constant limit. */
2012 2, /* Max cond insns. */
2013 ARM_PREFETCH_NOT_BENEFICIAL,
2014 false, /* Prefer constant pool. */
2015 arm_default_branch_cost,
2016 true, /* Prefer LDRD/STRD. */
2017 {true, true}, /* Prefer non short circuit. */
2018 &arm_default_vec_cost, /* Vectorizer costs. */
2019 false, /* Prefer Neon for 64-bits bitops. */
2020 true, true, /* Prefer 32-bit encodings. */
2021 true, /* Prefer Neon for stringops. */
2022 8, /* Maximum insns to inline memset. */
2023 ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */
2024 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
2027 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2028 cycle to execute each. An LDR from the constant pool also takes two cycles
2029 to execute, but mildly increases pipelining opportunity (consecutive
2030 loads/stores can be pipelined together, saving one cycle), and may also
2031 improve icache utilisation. Hence we prefer the constant pool for such
2032 processors. */
2034 const struct tune_params arm_v7m_tune =
2036 arm_9e_rtx_costs,
2037 &v7m_extra_costs,
2038 NULL, /* Sched adj cost. */
2039 1, /* Constant limit. */
2040 2, /* Max cond insns. */
2041 ARM_PREFETCH_NOT_BENEFICIAL,
2042 true, /* Prefer constant pool. */
2043 arm_cortex_m_branch_cost,
2044 false, /* Prefer LDRD/STRD. */
2045 {false, false}, /* Prefer non short circuit. */
2046 &arm_default_vec_cost, /* Vectorizer costs. */
2047 false, /* Prefer Neon for 64-bits bitops. */
2048 false, false, /* Prefer 32-bit encodings. */
2049 false, /* Prefer Neon for stringops. */
2050 8, /* Maximum insns to inline memset. */
2051 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
2052 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
2055 /* Cortex-M7 tuning. */
2057 const struct tune_params arm_cortex_m7_tune =
2059 arm_9e_rtx_costs,
2060 &v7m_extra_costs,
2061 NULL, /* Sched adj cost. */
2062 0, /* Constant limit. */
2063 1, /* Max cond insns. */
2064 ARM_PREFETCH_NOT_BENEFICIAL,
2065 true, /* Prefer constant pool. */
2066 arm_cortex_m7_branch_cost,
2067 false, /* Prefer LDRD/STRD. */
2068 {true, true}, /* Prefer non short circuit. */
2069 &arm_default_vec_cost, /* Vectorizer costs. */
2070 false, /* Prefer Neon for 64-bits bitops. */
2071 false, false, /* Prefer 32-bit encodings. */
2072 false, /* Prefer Neon for stringops. */
2073 8, /* Maximum insns to inline memset. */
2074 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
2075 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
2078 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2079 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
2080 const struct tune_params arm_v6m_tune =
2082 arm_9e_rtx_costs,
2083 NULL,
2084 NULL, /* Sched adj cost. */
2085 1, /* Constant limit. */
2086 5, /* Max cond insns. */
2087 ARM_PREFETCH_NOT_BENEFICIAL,
2088 false, /* Prefer constant pool. */
2089 arm_default_branch_cost,
2090 false, /* Prefer LDRD/STRD. */
2091 {false, false}, /* Prefer non short circuit. */
2092 &arm_default_vec_cost, /* Vectorizer costs. */
2093 false, /* Prefer Neon for 64-bits bitops. */
2094 false, false, /* Prefer 32-bit encodings. */
2095 false, /* Prefer Neon for stringops. */
2096 8, /* Maximum insns to inline memset. */
2097 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
2098 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
2101 const struct tune_params arm_fa726te_tune =
2103 arm_9e_rtx_costs,
2104 NULL,
2105 fa726te_sched_adjust_cost,
2106 1, /* Constant limit. */
2107 5, /* Max cond insns. */
2108 ARM_PREFETCH_NOT_BENEFICIAL,
2109 true, /* Prefer constant pool. */
2110 arm_default_branch_cost,
2111 false, /* Prefer LDRD/STRD. */
2112 {true, true}, /* Prefer non short circuit. */
2113 &arm_default_vec_cost, /* Vectorizer costs. */
2114 false, /* Prefer Neon for 64-bits bitops. */
2115 false, false, /* Prefer 32-bit encodings. */
2116 false, /* Prefer Neon for stringops. */
2117 8, /* Maximum insns to inline memset. */
2118 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
2119 ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */
2123 /* Not all of these give usefully different compilation alternatives,
2124 but there is no simple way of generalizing them. */
2125 static const struct processors all_cores[] =
2127 /* ARM Cores */
2128 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2129 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2130 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2131 #include "arm-cores.def"
2132 #undef ARM_CORE
2133 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2136 static const struct processors all_architectures[] =
2138 /* ARM Architectures */
2139 /* We don't specify tuning costs here as it will be figured out
2140 from the core. */
2142 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2143 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2144 #include "arm-arches.def"
2145 #undef ARM_ARCH
2146 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2150 /* These are populated as commandline arguments are processed, or NULL
2151 if not specified. */
2152 static const struct processors *arm_selected_arch;
2153 static const struct processors *arm_selected_cpu;
2154 static const struct processors *arm_selected_tune;
2156 /* The name of the preprocessor macro to define for this architecture. */
2158 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
2160 /* Available values for -mfpu=. */
2162 static const struct arm_fpu_desc all_fpus[] =
2164 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2165 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2166 #include "arm-fpus.def"
2167 #undef ARM_FPU
2171 /* Supported TLS relocations. */
2173 enum tls_reloc {
2174 TLS_GD32,
2175 TLS_LDM32,
2176 TLS_LDO32,
2177 TLS_IE32,
2178 TLS_LE32,
2179 TLS_DESCSEQ /* GNU scheme */
2182 /* The maximum number of insns to be used when loading a constant. */
2183 inline static int
2184 arm_constant_limit (bool size_p)
2186 return size_p ? 1 : current_tune->constant_limit;
2189 /* Emit an insn that's a simple single-set. Both the operands must be known
2190 to be valid. */
2191 inline static rtx_insn *
2192 emit_set_insn (rtx x, rtx y)
2194 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
2197 /* Return the number of bits set in VALUE. */
2198 static unsigned
2199 bit_count (unsigned long value)
2201 unsigned long count = 0;
2203 while (value)
2205 count++;
2206 value &= value - 1; /* Clear the least-significant set bit. */
2209 return count;
2212 typedef struct
2214 machine_mode mode;
2215 const char *name;
2216 } arm_fixed_mode_set;
2218 /* A small helper for setting fixed-point library libfuncs. */
2220 static void
2221 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2222 const char *funcname, const char *modename,
2223 int num_suffix)
2225 char buffer[50];
2227 if (num_suffix == 0)
2228 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2229 else
2230 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2232 set_optab_libfunc (optable, mode, buffer);
2235 static void
2236 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2237 machine_mode from, const char *funcname,
2238 const char *toname, const char *fromname)
2240 char buffer[50];
2241 const char *maybe_suffix_2 = "";
2243 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2244 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2245 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2246 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2247 maybe_suffix_2 = "2";
2249 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2250 maybe_suffix_2);
2252 set_conv_libfunc (optable, to, from, buffer);
2255 /* Set up library functions unique to ARM. */
2257 static void
2258 arm_init_libfuncs (void)
2260 /* For Linux, we have access to kernel support for atomic operations. */
2261 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2262 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2264 /* There are no special library functions unless we are using the
2265 ARM BPABI. */
2266 if (!TARGET_BPABI)
2267 return;
2269 /* The functions below are described in Section 4 of the "Run-Time
2270 ABI for the ARM architecture", Version 1.0. */
2272 /* Double-precision floating-point arithmetic. Table 2. */
2273 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2274 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2275 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2276 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2277 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2279 /* Double-precision comparisons. Table 3. */
2280 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2281 set_optab_libfunc (ne_optab, DFmode, NULL);
2282 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2283 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2284 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2285 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2286 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2288 /* Single-precision floating-point arithmetic. Table 4. */
2289 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2290 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2291 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2292 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2293 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2295 /* Single-precision comparisons. Table 5. */
2296 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2297 set_optab_libfunc (ne_optab, SFmode, NULL);
2298 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2299 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2300 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2301 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2302 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2304 /* Floating-point to integer conversions. Table 6. */
2305 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2306 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2307 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2308 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2309 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2310 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2311 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2312 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2314 /* Conversions between floating types. Table 7. */
2315 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2316 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2318 /* Integer to floating-point conversions. Table 8. */
2319 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2320 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2321 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2322 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2323 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2324 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2325 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2326 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2328 /* Long long. Table 9. */
2329 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2330 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2331 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2332 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2333 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2334 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2335 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2336 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2338 /* Integer (32/32->32) division. \S 4.3.1. */
2339 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2340 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2342 /* The divmod functions are designed so that they can be used for
2343 plain division, even though they return both the quotient and the
2344 remainder. The quotient is returned in the usual location (i.e.,
2345 r0 for SImode, {r0, r1} for DImode), just as would be expected
2346 for an ordinary division routine. Because the AAPCS calling
2347 conventions specify that all of { r0, r1, r2, r3 } are
2348 callee-saved registers, there is no need to tell the compiler
2349 explicitly that those registers are clobbered by these
2350 routines. */
2351 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2352 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2354 /* For SImode division the ABI provides div-without-mod routines,
2355 which are faster. */
2356 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2357 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2359 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2360 divmod libcalls instead. */
2361 set_optab_libfunc (smod_optab, DImode, NULL);
2362 set_optab_libfunc (umod_optab, DImode, NULL);
2363 set_optab_libfunc (smod_optab, SImode, NULL);
2364 set_optab_libfunc (umod_optab, SImode, NULL);
2366 /* Half-precision float operations. The compiler handles all operations
2367 with NULL libfuncs by converting the SFmode. */
2368 switch (arm_fp16_format)
2370 case ARM_FP16_FORMAT_IEEE:
2371 case ARM_FP16_FORMAT_ALTERNATIVE:
2373 /* Conversions. */
2374 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2375 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2376 ? "__gnu_f2h_ieee"
2377 : "__gnu_f2h_alternative"));
2378 set_conv_libfunc (sext_optab, SFmode, HFmode,
2379 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2380 ? "__gnu_h2f_ieee"
2381 : "__gnu_h2f_alternative"));
2383 /* Arithmetic. */
2384 set_optab_libfunc (add_optab, HFmode, NULL);
2385 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2386 set_optab_libfunc (smul_optab, HFmode, NULL);
2387 set_optab_libfunc (neg_optab, HFmode, NULL);
2388 set_optab_libfunc (sub_optab, HFmode, NULL);
2390 /* Comparisons. */
2391 set_optab_libfunc (eq_optab, HFmode, NULL);
2392 set_optab_libfunc (ne_optab, HFmode, NULL);
2393 set_optab_libfunc (lt_optab, HFmode, NULL);
2394 set_optab_libfunc (le_optab, HFmode, NULL);
2395 set_optab_libfunc (ge_optab, HFmode, NULL);
2396 set_optab_libfunc (gt_optab, HFmode, NULL);
2397 set_optab_libfunc (unord_optab, HFmode, NULL);
2398 break;
2400 default:
2401 break;
2404 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2406 const arm_fixed_mode_set fixed_arith_modes[] =
2408 { QQmode, "qq" },
2409 { UQQmode, "uqq" },
2410 { HQmode, "hq" },
2411 { UHQmode, "uhq" },
2412 { SQmode, "sq" },
2413 { USQmode, "usq" },
2414 { DQmode, "dq" },
2415 { UDQmode, "udq" },
2416 { TQmode, "tq" },
2417 { UTQmode, "utq" },
2418 { HAmode, "ha" },
2419 { UHAmode, "uha" },
2420 { SAmode, "sa" },
2421 { USAmode, "usa" },
2422 { DAmode, "da" },
2423 { UDAmode, "uda" },
2424 { TAmode, "ta" },
2425 { UTAmode, "uta" }
2427 const arm_fixed_mode_set fixed_conv_modes[] =
2429 { QQmode, "qq" },
2430 { UQQmode, "uqq" },
2431 { HQmode, "hq" },
2432 { UHQmode, "uhq" },
2433 { SQmode, "sq" },
2434 { USQmode, "usq" },
2435 { DQmode, "dq" },
2436 { UDQmode, "udq" },
2437 { TQmode, "tq" },
2438 { UTQmode, "utq" },
2439 { HAmode, "ha" },
2440 { UHAmode, "uha" },
2441 { SAmode, "sa" },
2442 { USAmode, "usa" },
2443 { DAmode, "da" },
2444 { UDAmode, "uda" },
2445 { TAmode, "ta" },
2446 { UTAmode, "uta" },
2447 { QImode, "qi" },
2448 { HImode, "hi" },
2449 { SImode, "si" },
2450 { DImode, "di" },
2451 { TImode, "ti" },
2452 { SFmode, "sf" },
2453 { DFmode, "df" }
2455 unsigned int i, j;
2457 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2459 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2460 "add", fixed_arith_modes[i].name, 3);
2461 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2462 "ssadd", fixed_arith_modes[i].name, 3);
2463 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2464 "usadd", fixed_arith_modes[i].name, 3);
2465 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2466 "sub", fixed_arith_modes[i].name, 3);
2467 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2468 "sssub", fixed_arith_modes[i].name, 3);
2469 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2470 "ussub", fixed_arith_modes[i].name, 3);
2471 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2472 "mul", fixed_arith_modes[i].name, 3);
2473 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2474 "ssmul", fixed_arith_modes[i].name, 3);
2475 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2476 "usmul", fixed_arith_modes[i].name, 3);
2477 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2478 "div", fixed_arith_modes[i].name, 3);
2479 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2480 "udiv", fixed_arith_modes[i].name, 3);
2481 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2482 "ssdiv", fixed_arith_modes[i].name, 3);
2483 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2484 "usdiv", fixed_arith_modes[i].name, 3);
2485 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2486 "neg", fixed_arith_modes[i].name, 2);
2487 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2488 "ssneg", fixed_arith_modes[i].name, 2);
2489 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2490 "usneg", fixed_arith_modes[i].name, 2);
2491 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2492 "ashl", fixed_arith_modes[i].name, 3);
2493 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2494 "ashr", fixed_arith_modes[i].name, 3);
2495 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2496 "lshr", fixed_arith_modes[i].name, 3);
2497 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2498 "ssashl", fixed_arith_modes[i].name, 3);
2499 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2500 "usashl", fixed_arith_modes[i].name, 3);
2501 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2502 "cmp", fixed_arith_modes[i].name, 2);
2505 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2506 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2508 if (i == j
2509 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2510 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2511 continue;
2513 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2514 fixed_conv_modes[j].mode, "fract",
2515 fixed_conv_modes[i].name,
2516 fixed_conv_modes[j].name);
2517 arm_set_fixed_conv_libfunc (satfract_optab,
2518 fixed_conv_modes[i].mode,
2519 fixed_conv_modes[j].mode, "satfract",
2520 fixed_conv_modes[i].name,
2521 fixed_conv_modes[j].name);
2522 arm_set_fixed_conv_libfunc (fractuns_optab,
2523 fixed_conv_modes[i].mode,
2524 fixed_conv_modes[j].mode, "fractuns",
2525 fixed_conv_modes[i].name,
2526 fixed_conv_modes[j].name);
2527 arm_set_fixed_conv_libfunc (satfractuns_optab,
2528 fixed_conv_modes[i].mode,
2529 fixed_conv_modes[j].mode, "satfractuns",
2530 fixed_conv_modes[i].name,
2531 fixed_conv_modes[j].name);
2535 if (TARGET_AAPCS_BASED)
2536 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2539 /* On AAPCS systems, this is the "struct __va_list". */
2540 static GTY(()) tree va_list_type;
2542 /* Return the type to use as __builtin_va_list. */
2543 static tree
2544 arm_build_builtin_va_list (void)
2546 tree va_list_name;
2547 tree ap_field;
2549 if (!TARGET_AAPCS_BASED)
2550 return std_build_builtin_va_list ();
2552 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2553 defined as:
2555 struct __va_list
2557 void *__ap;
2560 The C Library ABI further reinforces this definition in \S
2561 4.1.
2563 We must follow this definition exactly. The structure tag
2564 name is visible in C++ mangled names, and thus forms a part
2565 of the ABI. The field name may be used by people who
2566 #include <stdarg.h>. */
2567 /* Create the type. */
2568 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2569 /* Give it the required name. */
2570 va_list_name = build_decl (BUILTINS_LOCATION,
2571 TYPE_DECL,
2572 get_identifier ("__va_list"),
2573 va_list_type);
2574 DECL_ARTIFICIAL (va_list_name) = 1;
2575 TYPE_NAME (va_list_type) = va_list_name;
2576 TYPE_STUB_DECL (va_list_type) = va_list_name;
2577 /* Create the __ap field. */
2578 ap_field = build_decl (BUILTINS_LOCATION,
2579 FIELD_DECL,
2580 get_identifier ("__ap"),
2581 ptr_type_node);
2582 DECL_ARTIFICIAL (ap_field) = 1;
2583 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2584 TYPE_FIELDS (va_list_type) = ap_field;
2585 /* Compute its layout. */
2586 layout_type (va_list_type);
2588 return va_list_type;
2591 /* Return an expression of type "void *" pointing to the next
2592 available argument in a variable-argument list. VALIST is the
2593 user-level va_list object, of type __builtin_va_list. */
2594 static tree
2595 arm_extract_valist_ptr (tree valist)
2597 if (TREE_TYPE (valist) == error_mark_node)
2598 return error_mark_node;
2600 /* On an AAPCS target, the pointer is stored within "struct
2601 va_list". */
2602 if (TARGET_AAPCS_BASED)
2604 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2605 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2606 valist, ap_field, NULL_TREE);
2609 return valist;
2612 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2613 static void
2614 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2616 valist = arm_extract_valist_ptr (valist);
2617 std_expand_builtin_va_start (valist, nextarg);
2620 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2621 static tree
2622 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2623 gimple_seq *post_p)
2625 valist = arm_extract_valist_ptr (valist);
2626 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2629 /* Fix up any incompatible options that the user has specified. */
2630 static void
2631 arm_option_override (void)
2633 arm_selected_arch = NULL;
2634 arm_selected_cpu = NULL;
2635 arm_selected_tune = NULL;
2637 if (global_options_set.x_arm_arch_option)
2638 arm_selected_arch = &all_architectures[arm_arch_option];
2640 if (global_options_set.x_arm_cpu_option)
2642 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2643 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2646 if (global_options_set.x_arm_tune_option)
2647 arm_selected_tune = &all_cores[(int) arm_tune_option];
2649 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2650 SUBTARGET_OVERRIDE_OPTIONS;
2651 #endif
2653 if (arm_selected_arch)
2655 if (arm_selected_cpu)
2657 /* Check for conflict between mcpu and march. */
2658 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2660 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2661 arm_selected_cpu->name, arm_selected_arch->name);
2662 /* -march wins for code generation.
2663 -mcpu wins for default tuning. */
2664 if (!arm_selected_tune)
2665 arm_selected_tune = arm_selected_cpu;
2667 arm_selected_cpu = arm_selected_arch;
2669 else
2670 /* -mcpu wins. */
2671 arm_selected_arch = NULL;
2673 else
2674 /* Pick a CPU based on the architecture. */
2675 arm_selected_cpu = arm_selected_arch;
2678 /* If the user did not specify a processor, choose one for them. */
2679 if (!arm_selected_cpu)
2681 const struct processors * sel;
2682 unsigned int sought;
2684 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2685 if (!arm_selected_cpu->name)
2687 #ifdef SUBTARGET_CPU_DEFAULT
2688 /* Use the subtarget default CPU if none was specified by
2689 configure. */
2690 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2691 #endif
2692 /* Default to ARM6. */
2693 if (!arm_selected_cpu->name)
2694 arm_selected_cpu = &all_cores[arm6];
2697 sel = arm_selected_cpu;
2698 insn_flags = sel->flags;
2700 /* Now check to see if the user has specified some command line
2701 switch that require certain abilities from the cpu. */
2702 sought = 0;
2704 if (TARGET_INTERWORK || TARGET_THUMB)
2706 sought |= (FL_THUMB | FL_MODE32);
2708 /* There are no ARM processors that support both APCS-26 and
2709 interworking. Therefore we force FL_MODE26 to be removed
2710 from insn_flags here (if it was set), so that the search
2711 below will always be able to find a compatible processor. */
2712 insn_flags &= ~FL_MODE26;
2715 if (sought != 0 && ((sought & insn_flags) != sought))
2717 /* Try to locate a CPU type that supports all of the abilities
2718 of the default CPU, plus the extra abilities requested by
2719 the user. */
2720 for (sel = all_cores; sel->name != NULL; sel++)
2721 if ((sel->flags & sought) == (sought | insn_flags))
2722 break;
2724 if (sel->name == NULL)
2726 unsigned current_bit_count = 0;
2727 const struct processors * best_fit = NULL;
2729 /* Ideally we would like to issue an error message here
2730 saying that it was not possible to find a CPU compatible
2731 with the default CPU, but which also supports the command
2732 line options specified by the programmer, and so they
2733 ought to use the -mcpu=<name> command line option to
2734 override the default CPU type.
2736 If we cannot find a cpu that has both the
2737 characteristics of the default cpu and the given
2738 command line options we scan the array again looking
2739 for a best match. */
2740 for (sel = all_cores; sel->name != NULL; sel++)
2741 if ((sel->flags & sought) == sought)
2743 unsigned count;
2745 count = bit_count (sel->flags & insn_flags);
2747 if (count >= current_bit_count)
2749 best_fit = sel;
2750 current_bit_count = count;
2754 gcc_assert (best_fit);
2755 sel = best_fit;
2758 arm_selected_cpu = sel;
2762 gcc_assert (arm_selected_cpu);
2763 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2764 if (!arm_selected_tune)
2765 arm_selected_tune = &all_cores[arm_selected_cpu->core];
2767 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2768 insn_flags = arm_selected_cpu->flags;
2769 arm_base_arch = arm_selected_cpu->base_arch;
2771 arm_tune = arm_selected_tune->core;
2772 tune_flags = arm_selected_tune->flags;
2773 current_tune = arm_selected_tune->tune;
2775 /* Make sure that the processor choice does not conflict with any of the
2776 other command line choices. */
2777 if (TARGET_ARM && !(insn_flags & FL_NOTM))
2778 error ("target CPU does not support ARM mode");
2780 /* BPABI targets use linker tricks to allow interworking on cores
2781 without thumb support. */
2782 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2784 warning (0, "target CPU does not support interworking" );
2785 target_flags &= ~MASK_INTERWORK;
2788 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2790 warning (0, "target CPU does not support THUMB instructions");
2791 target_flags &= ~MASK_THUMB;
2794 if (TARGET_APCS_FRAME && TARGET_THUMB)
2796 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2797 target_flags &= ~MASK_APCS_FRAME;
2800 /* Callee super interworking implies thumb interworking. Adding
2801 this to the flags here simplifies the logic elsewhere. */
2802 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2803 target_flags |= MASK_INTERWORK;
2805 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2806 from here where no function is being compiled currently. */
2807 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2808 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2810 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2811 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2813 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2815 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2816 target_flags |= MASK_APCS_FRAME;
2819 if (TARGET_POKE_FUNCTION_NAME)
2820 target_flags |= MASK_APCS_FRAME;
2822 if (TARGET_APCS_REENT && flag_pic)
2823 error ("-fpic and -mapcs-reent are incompatible");
2825 if (TARGET_APCS_REENT)
2826 warning (0, "APCS reentrant code not supported. Ignored");
2828 /* If this target is normally configured to use APCS frames, warn if they
2829 are turned off and debugging is turned on. */
2830 if (TARGET_ARM
2831 && write_symbols != NO_DEBUG
2832 && !TARGET_APCS_FRAME
2833 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2834 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2836 if (TARGET_APCS_FLOAT)
2837 warning (0, "passing floating point arguments in fp regs not yet supported");
2839 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2840 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2841 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2842 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2843 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2844 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2845 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2846 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2847 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2848 arm_arch6m = arm_arch6 && !arm_arch_notm;
2849 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2850 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2851 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2852 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2853 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2855 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2856 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2857 thumb_code = TARGET_ARM == 0;
2858 thumb1_code = TARGET_THUMB1 != 0;
2859 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2860 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2861 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2862 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2863 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2864 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2865 arm_arch_no_volatile_ce = (insn_flags & FL_NO_VOLATILE_CE) != 0;
2866 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2867 arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2868 arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0;
2869 if (arm_restrict_it == 2)
2870 arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2872 if (!TARGET_THUMB2)
2873 arm_restrict_it = 0;
2875 /* If we are not using the default (ARM mode) section anchor offset
2876 ranges, then set the correct ranges now. */
2877 if (TARGET_THUMB1)
2879 /* Thumb-1 LDR instructions cannot have negative offsets.
2880 Permissible positive offset ranges are 5-bit (for byte loads),
2881 6-bit (for halfword loads), or 7-bit (for word loads).
2882 Empirical results suggest a 7-bit anchor range gives the best
2883 overall code size. */
2884 targetm.min_anchor_offset = 0;
2885 targetm.max_anchor_offset = 127;
2887 else if (TARGET_THUMB2)
2889 /* The minimum is set such that the total size of the block
2890 for a particular anchor is 248 + 1 + 4095 bytes, which is
2891 divisible by eight, ensuring natural spacing of anchors. */
2892 targetm.min_anchor_offset = -248;
2893 targetm.max_anchor_offset = 4095;
2896 /* V5 code we generate is completely interworking capable, so we turn off
2897 TARGET_INTERWORK here to avoid many tests later on. */
2899 /* XXX However, we must pass the right pre-processor defines to CPP
2900 or GLD can get confused. This is a hack. */
2901 if (TARGET_INTERWORK)
2902 arm_cpp_interwork = 1;
2904 if (arm_arch5)
2905 target_flags &= ~MASK_INTERWORK;
2907 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2908 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2910 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2911 error ("iwmmxt abi requires an iwmmxt capable cpu");
2913 if (!global_options_set.x_arm_fpu_index)
2915 const char *target_fpu_name;
2916 bool ok;
2918 #ifdef FPUTYPE_DEFAULT
2919 target_fpu_name = FPUTYPE_DEFAULT;
2920 #else
2921 target_fpu_name = "vfp";
2922 #endif
2924 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2925 CL_TARGET);
2926 gcc_assert (ok);
2929 arm_fpu_desc = &all_fpus[arm_fpu_index];
2931 switch (arm_fpu_desc->model)
2933 case ARM_FP_MODEL_VFP:
2934 arm_fpu_attr = FPU_VFP;
2935 break;
2937 default:
2938 gcc_unreachable();
2941 if (TARGET_AAPCS_BASED)
2943 if (TARGET_CALLER_INTERWORKING)
2944 error ("AAPCS does not support -mcaller-super-interworking");
2945 else
2946 if (TARGET_CALLEE_INTERWORKING)
2947 error ("AAPCS does not support -mcallee-super-interworking");
2950 /* iWMMXt and NEON are incompatible. */
2951 if (TARGET_IWMMXT && TARGET_NEON)
2952 error ("iWMMXt and NEON are incompatible");
2954 /* iWMMXt unsupported under Thumb mode. */
2955 if (TARGET_THUMB && TARGET_IWMMXT)
2956 error ("iWMMXt unsupported under Thumb mode");
2958 /* __fp16 support currently assumes the core has ldrh. */
2959 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2960 sorry ("__fp16 and no ldrh");
2962 /* If soft-float is specified then don't use FPU. */
2963 if (TARGET_SOFT_FLOAT)
2964 arm_fpu_attr = FPU_NONE;
2966 if (TARGET_AAPCS_BASED)
2968 if (arm_abi == ARM_ABI_IWMMXT)
2969 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2970 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2971 && TARGET_HARD_FLOAT
2972 && TARGET_VFP)
2973 arm_pcs_default = ARM_PCS_AAPCS_VFP;
2974 else
2975 arm_pcs_default = ARM_PCS_AAPCS;
2977 else
2979 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2980 sorry ("-mfloat-abi=hard and VFP");
2982 if (arm_abi == ARM_ABI_APCS)
2983 arm_pcs_default = ARM_PCS_APCS;
2984 else
2985 arm_pcs_default = ARM_PCS_ATPCS;
2988 /* For arm2/3 there is no need to do any scheduling if we are doing
2989 software floating-point. */
2990 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2991 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2993 /* Use the cp15 method if it is available. */
2994 if (target_thread_pointer == TP_AUTO)
2996 if (arm_arch6k && !TARGET_THUMB1)
2997 target_thread_pointer = TP_CP15;
2998 else
2999 target_thread_pointer = TP_SOFT;
3002 if (TARGET_HARD_TP && TARGET_THUMB1)
3003 error ("can not use -mtp=cp15 with 16-bit Thumb");
3005 /* Override the default structure alignment for AAPCS ABI. */
3006 if (!global_options_set.x_arm_structure_size_boundary)
3008 if (TARGET_AAPCS_BASED)
3009 arm_structure_size_boundary = 8;
3011 else
3013 if (arm_structure_size_boundary != 8
3014 && arm_structure_size_boundary != 32
3015 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3017 if (ARM_DOUBLEWORD_ALIGN)
3018 warning (0,
3019 "structure size boundary can only be set to 8, 32 or 64");
3020 else
3021 warning (0, "structure size boundary can only be set to 8 or 32");
3022 arm_structure_size_boundary
3023 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3027 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
3029 error ("RTP PIC is incompatible with Thumb");
3030 flag_pic = 0;
3033 /* If stack checking is disabled, we can use r10 as the PIC register,
3034 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3035 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3037 if (TARGET_VXWORKS_RTP)
3038 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3039 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3042 if (flag_pic && TARGET_VXWORKS_RTP)
3043 arm_pic_register = 9;
3045 if (arm_pic_register_string != NULL)
3047 int pic_register = decode_reg_name (arm_pic_register_string);
3049 if (!flag_pic)
3050 warning (0, "-mpic-register= is useless without -fpic");
3052 /* Prevent the user from choosing an obviously stupid PIC register. */
3053 else if (pic_register < 0 || call_used_regs[pic_register]
3054 || pic_register == HARD_FRAME_POINTER_REGNUM
3055 || pic_register == STACK_POINTER_REGNUM
3056 || pic_register >= PC_REGNUM
3057 || (TARGET_VXWORKS_RTP
3058 && (unsigned int) pic_register != arm_pic_register))
3059 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3060 else
3061 arm_pic_register = pic_register;
3064 if (TARGET_VXWORKS_RTP
3065 && !global_options_set.x_arm_pic_data_is_text_relative)
3066 arm_pic_data_is_text_relative = 0;
3068 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3069 if (fix_cm3_ldrd == 2)
3071 if (arm_selected_cpu->core == cortexm3)
3072 fix_cm3_ldrd = 1;
3073 else
3074 fix_cm3_ldrd = 0;
3077 /* Enable -munaligned-access by default for
3078 - all ARMv6 architecture-based processors
3079 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3080 - ARMv8 architecture-base processors.
3082 Disable -munaligned-access by default for
3083 - all pre-ARMv6 architecture-based processors
3084 - ARMv6-M architecture-based processors. */
3086 if (unaligned_access == 2)
3088 if (arm_arch6 && (arm_arch_notm || arm_arch7))
3089 unaligned_access = 1;
3090 else
3091 unaligned_access = 0;
3093 else if (unaligned_access == 1
3094 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3096 warning (0, "target CPU does not support unaligned accesses");
3097 unaligned_access = 0;
3100 if (TARGET_THUMB1 && flag_schedule_insns)
3102 /* Don't warn since it's on by default in -O2. */
3103 flag_schedule_insns = 0;
3106 if (optimize_size)
3108 /* If optimizing for size, bump the number of instructions that we
3109 are prepared to conditionally execute (even on a StrongARM). */
3110 max_insns_skipped = 6;
3112 /* For THUMB2, we limit the conditional sequence to one IT block. */
3113 if (TARGET_THUMB2)
3114 max_insns_skipped = MAX_INSN_PER_IT_BLOCK;
3116 else
3117 max_insns_skipped = current_tune->max_insns_skipped;
3119 /* Hot/Cold partitioning is not currently supported, since we can't
3120 handle literal pool placement in that case. */
3121 if (flag_reorder_blocks_and_partition)
3123 inform (input_location,
3124 "-freorder-blocks-and-partition not supported on this architecture");
3125 flag_reorder_blocks_and_partition = 0;
3126 flag_reorder_blocks = 1;
3129 if (flag_pic)
3130 /* Hoisting PIC address calculations more aggressively provides a small,
3131 but measurable, size reduction for PIC code. Therefore, we decrease
3132 the bar for unrestricted expression hoisting to the cost of PIC address
3133 calculation, which is 2 instructions. */
3134 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3135 global_options.x_param_values,
3136 global_options_set.x_param_values);
3138 /* ARM EABI defaults to strict volatile bitfields. */
3139 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3140 && abi_version_at_least(2))
3141 flag_strict_volatile_bitfields = 1;
3143 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
3144 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
3145 if (flag_prefetch_loop_arrays < 0
3146 && HAVE_prefetch
3147 && optimize >= 3
3148 && current_tune->num_prefetch_slots > 0)
3149 flag_prefetch_loop_arrays = 1;
3151 /* Set up parameters to be used in prefetching algorithm. Do not override the
3152 defaults unless we are tuning for a core we have researched values for. */
3153 if (current_tune->num_prefetch_slots > 0)
3154 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3155 current_tune->num_prefetch_slots,
3156 global_options.x_param_values,
3157 global_options_set.x_param_values);
3158 if (current_tune->l1_cache_line_size >= 0)
3159 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3160 current_tune->l1_cache_line_size,
3161 global_options.x_param_values,
3162 global_options_set.x_param_values);
3163 if (current_tune->l1_cache_size >= 0)
3164 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3165 current_tune->l1_cache_size,
3166 global_options.x_param_values,
3167 global_options_set.x_param_values);
3169 /* Use Neon to perform 64-bits operations rather than core
3170 registers. */
3171 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3172 if (use_neon_for_64bits == 1)
3173 prefer_neon_for_64bits = true;
3175 /* Use the alternative scheduling-pressure algorithm by default. */
3176 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3177 global_options.x_param_values,
3178 global_options_set.x_param_values);
3180 /* Look through ready list and all of queue for instructions
3181 relevant for L2 auto-prefetcher. */
3182 int param_sched_autopref_queue_depth;
3183 if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_OFF)
3184 param_sched_autopref_queue_depth = -1;
3185 else if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_RANK)
3186 param_sched_autopref_queue_depth = 0;
3187 else if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_FULL)
3188 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3189 else
3190 gcc_unreachable ();
3191 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3192 param_sched_autopref_queue_depth,
3193 global_options.x_param_values,
3194 global_options_set.x_param_values);
3196 /* Disable shrink-wrap when optimizing function for size, since it tends to
3197 generate additional returns. */
3198 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
3199 flag_shrink_wrap = false;
3200 /* TBD: Dwarf info for apcs frame is not handled yet. */
3201 if (TARGET_APCS_FRAME)
3202 flag_shrink_wrap = false;
3204 /* We only support -mslow-flash-data on armv7-m targets. */
3205 if (target_slow_flash_data
3206 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
3207 || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
3208 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
3210 /* Currently, for slow flash data, we just disable literal pools. */
3211 if (target_slow_flash_data)
3212 arm_disable_literal_pool = true;
3214 /* Thumb2 inline assembly code should always use unified syntax.
3215 This will apply to ARM and Thumb1 eventually. */
3216 if (TARGET_THUMB2)
3217 inline_asm_unified = 1;
3219 /* Disable scheduling fusion by default if it's not armv7 processor
3220 or doesn't prefer ldrd/strd. */
3221 if (flag_schedule_fusion == 2
3222 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3223 flag_schedule_fusion = 0;
3225 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3226 - epilogue_insns - does not accurately model the corresponding insns
3227 emitted in the asm file. In particular, see the comment in thumb_exit
3228 'Find out how many of the (return) argument registers we can corrupt'.
3229 As a consequence, the epilogue may clobber registers without fipa-ra
3230 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3231 TODO: Accurately model clobbers for epilogue_insns and reenable
3232 fipa-ra. */
3233 if (TARGET_THUMB1)
3234 flag_ipa_ra = 0;
3236 /* Register global variables with the garbage collector. */
3237 arm_add_gc_roots ();
3240 static void
3241 arm_add_gc_roots (void)
3243 gcc_obstack_init(&minipool_obstack);
3244 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3247 /* A table of known ARM exception types.
3248 For use with the interrupt function attribute. */
3250 typedef struct
3252 const char *const arg;
3253 const unsigned long return_value;
3255 isr_attribute_arg;
3257 static const isr_attribute_arg isr_attribute_args [] =
3259 { "IRQ", ARM_FT_ISR },
3260 { "irq", ARM_FT_ISR },
3261 { "FIQ", ARM_FT_FIQ },
3262 { "fiq", ARM_FT_FIQ },
3263 { "ABORT", ARM_FT_ISR },
3264 { "abort", ARM_FT_ISR },
3265 { "ABORT", ARM_FT_ISR },
3266 { "abort", ARM_FT_ISR },
3267 { "UNDEF", ARM_FT_EXCEPTION },
3268 { "undef", ARM_FT_EXCEPTION },
3269 { "SWI", ARM_FT_EXCEPTION },
3270 { "swi", ARM_FT_EXCEPTION },
3271 { NULL, ARM_FT_NORMAL }
3274 /* Returns the (interrupt) function type of the current
3275 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3277 static unsigned long
3278 arm_isr_value (tree argument)
3280 const isr_attribute_arg * ptr;
3281 const char * arg;
3283 if (!arm_arch_notm)
3284 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3286 /* No argument - default to IRQ. */
3287 if (argument == NULL_TREE)
3288 return ARM_FT_ISR;
3290 /* Get the value of the argument. */
3291 if (TREE_VALUE (argument) == NULL_TREE
3292 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3293 return ARM_FT_UNKNOWN;
3295 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3297 /* Check it against the list of known arguments. */
3298 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3299 if (streq (arg, ptr->arg))
3300 return ptr->return_value;
3302 /* An unrecognized interrupt type. */
3303 return ARM_FT_UNKNOWN;
3306 /* Computes the type of the current function. */
3308 static unsigned long
3309 arm_compute_func_type (void)
3311 unsigned long type = ARM_FT_UNKNOWN;
3312 tree a;
3313 tree attr;
3315 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3317 /* Decide if the current function is volatile. Such functions
3318 never return, and many memory cycles can be saved by not storing
3319 register values that will never be needed again. This optimization
3320 was added to speed up context switching in a kernel application. */
3321 if (optimize > 0
3322 && (TREE_NOTHROW (current_function_decl)
3323 || !(flag_unwind_tables
3324 || (flag_exceptions
3325 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3326 && TREE_THIS_VOLATILE (current_function_decl))
3327 type |= ARM_FT_VOLATILE;
3329 if (cfun->static_chain_decl != NULL)
3330 type |= ARM_FT_NESTED;
3332 attr = DECL_ATTRIBUTES (current_function_decl);
3334 a = lookup_attribute ("naked", attr);
3335 if (a != NULL_TREE)
3336 type |= ARM_FT_NAKED;
3338 a = lookup_attribute ("isr", attr);
3339 if (a == NULL_TREE)
3340 a = lookup_attribute ("interrupt", attr);
3342 if (a == NULL_TREE)
3343 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3344 else
3345 type |= arm_isr_value (TREE_VALUE (a));
3347 return type;
3350 /* Returns the type of the current function. */
3352 unsigned long
3353 arm_current_func_type (void)
3355 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3356 cfun->machine->func_type = arm_compute_func_type ();
3358 return cfun->machine->func_type;
3361 bool
3362 arm_allocate_stack_slots_for_args (void)
3364 /* Naked functions should not allocate stack slots for arguments. */
3365 return !IS_NAKED (arm_current_func_type ());
3368 static bool
3369 arm_warn_func_return (tree decl)
3371 /* Naked functions are implemented entirely in assembly, including the
3372 return sequence, so suppress warnings about this. */
3373 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3377 /* Output assembler code for a block containing the constant parts
3378 of a trampoline, leaving space for the variable parts.
3380 On the ARM, (if r8 is the static chain regnum, and remembering that
3381 referencing pc adds an offset of 8) the trampoline looks like:
3382 ldr r8, [pc, #0]
3383 ldr pc, [pc]
3384 .word static chain value
3385 .word function's address
3386 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3388 static void
3389 arm_asm_trampoline_template (FILE *f)
3391 if (TARGET_ARM)
3393 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3394 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3396 else if (TARGET_THUMB2)
3398 /* The Thumb-2 trampoline is similar to the arm implementation.
3399 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3400 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3401 STATIC_CHAIN_REGNUM, PC_REGNUM);
3402 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3404 else
3406 ASM_OUTPUT_ALIGN (f, 2);
3407 fprintf (f, "\t.code\t16\n");
3408 fprintf (f, ".Ltrampoline_start:\n");
3409 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3410 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3411 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3412 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3413 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3414 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3416 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3417 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3420 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3422 static void
3423 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3425 rtx fnaddr, mem, a_tramp;
3427 emit_block_move (m_tramp, assemble_trampoline_template (),
3428 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3430 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3431 emit_move_insn (mem, chain_value);
3433 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3434 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3435 emit_move_insn (mem, fnaddr);
3437 a_tramp = XEXP (m_tramp, 0);
3438 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3439 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3440 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3443 /* Thumb trampolines should be entered in thumb mode, so set
3444 the bottom bit of the address. */
3446 static rtx
3447 arm_trampoline_adjust_address (rtx addr)
3449 if (TARGET_THUMB)
3450 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3451 NULL, 0, OPTAB_LIB_WIDEN);
3452 return addr;
3455 /* Return 1 if it is possible to return using a single instruction.
3456 If SIBLING is non-null, this is a test for a return before a sibling
3457 call. SIBLING is the call insn, so we can examine its register usage. */
3460 use_return_insn (int iscond, rtx sibling)
3462 int regno;
3463 unsigned int func_type;
3464 unsigned long saved_int_regs;
3465 unsigned HOST_WIDE_INT stack_adjust;
3466 arm_stack_offsets *offsets;
3468 /* Never use a return instruction before reload has run. */
3469 if (!reload_completed)
3470 return 0;
3472 func_type = arm_current_func_type ();
3474 /* Naked, volatile and stack alignment functions need special
3475 consideration. */
3476 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3477 return 0;
3479 /* So do interrupt functions that use the frame pointer and Thumb
3480 interrupt functions. */
3481 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3482 return 0;
3484 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3485 && !optimize_function_for_size_p (cfun))
3486 return 0;
3488 offsets = arm_get_frame_offsets ();
3489 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3491 /* As do variadic functions. */
3492 if (crtl->args.pretend_args_size
3493 || cfun->machine->uses_anonymous_args
3494 /* Or if the function calls __builtin_eh_return () */
3495 || crtl->calls_eh_return
3496 /* Or if the function calls alloca */
3497 || cfun->calls_alloca
3498 /* Or if there is a stack adjustment. However, if the stack pointer
3499 is saved on the stack, we can use a pre-incrementing stack load. */
3500 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3501 && stack_adjust == 4)))
3502 return 0;
3504 saved_int_regs = offsets->saved_regs_mask;
3506 /* Unfortunately, the insn
3508 ldmib sp, {..., sp, ...}
3510 triggers a bug on most SA-110 based devices, such that the stack
3511 pointer won't be correctly restored if the instruction takes a
3512 page fault. We work around this problem by popping r3 along with
3513 the other registers, since that is never slower than executing
3514 another instruction.
3516 We test for !arm_arch5 here, because code for any architecture
3517 less than this could potentially be run on one of the buggy
3518 chips. */
3519 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3521 /* Validate that r3 is a call-clobbered register (always true in
3522 the default abi) ... */
3523 if (!call_used_regs[3])
3524 return 0;
3526 /* ... that it isn't being used for a return value ... */
3527 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3528 return 0;
3530 /* ... or for a tail-call argument ... */
3531 if (sibling)
3533 gcc_assert (CALL_P (sibling));
3535 if (find_regno_fusage (sibling, USE, 3))
3536 return 0;
3539 /* ... and that there are no call-saved registers in r0-r2
3540 (always true in the default ABI). */
3541 if (saved_int_regs & 0x7)
3542 return 0;
3545 /* Can't be done if interworking with Thumb, and any registers have been
3546 stacked. */
3547 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3548 return 0;
3550 /* On StrongARM, conditional returns are expensive if they aren't
3551 taken and multiple registers have been stacked. */
3552 if (iscond && arm_tune_strongarm)
3554 /* Conditional return when just the LR is stored is a simple
3555 conditional-load instruction, that's not expensive. */
3556 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3557 return 0;
3559 if (flag_pic
3560 && arm_pic_register != INVALID_REGNUM
3561 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3562 return 0;
3565 /* If there are saved registers but the LR isn't saved, then we need
3566 two instructions for the return. */
3567 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3568 return 0;
3570 /* Can't be done if any of the VFP regs are pushed,
3571 since this also requires an insn. */
3572 if (TARGET_HARD_FLOAT && TARGET_VFP)
3573 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3574 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3575 return 0;
3577 if (TARGET_REALLY_IWMMXT)
3578 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3579 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3580 return 0;
3582 return 1;
3585 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3586 shrink-wrapping if possible. This is the case if we need to emit a
3587 prologue, which we can test by looking at the offsets. */
3588 bool
3589 use_simple_return_p (void)
3591 arm_stack_offsets *offsets;
3593 offsets = arm_get_frame_offsets ();
3594 return offsets->outgoing_args != 0;
3597 /* Return TRUE if int I is a valid immediate ARM constant. */
3600 const_ok_for_arm (HOST_WIDE_INT i)
3602 int lowbit;
3604 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3605 be all zero, or all one. */
3606 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3607 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3608 != ((~(unsigned HOST_WIDE_INT) 0)
3609 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3610 return FALSE;
3612 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3614 /* Fast return for 0 and small values. We must do this for zero, since
3615 the code below can't handle that one case. */
3616 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3617 return TRUE;
3619 /* Get the number of trailing zeros. */
3620 lowbit = ffs((int) i) - 1;
3622 /* Only even shifts are allowed in ARM mode so round down to the
3623 nearest even number. */
3624 if (TARGET_ARM)
3625 lowbit &= ~1;
3627 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3628 return TRUE;
3630 if (TARGET_ARM)
3632 /* Allow rotated constants in ARM mode. */
3633 if (lowbit <= 4
3634 && ((i & ~0xc000003f) == 0
3635 || (i & ~0xf000000f) == 0
3636 || (i & ~0xfc000003) == 0))
3637 return TRUE;
3639 else
3641 HOST_WIDE_INT v;
3643 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3644 v = i & 0xff;
3645 v |= v << 16;
3646 if (i == v || i == (v | (v << 8)))
3647 return TRUE;
3649 /* Allow repeated pattern 0xXY00XY00. */
3650 v = i & 0xff00;
3651 v |= v << 16;
3652 if (i == v)
3653 return TRUE;
3656 return FALSE;
3659 /* Return true if I is a valid constant for the operation CODE. */
3661 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3663 if (const_ok_for_arm (i))
3664 return 1;
3666 switch (code)
3668 case SET:
3669 /* See if we can use movw. */
3670 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3671 return 1;
3672 else
3673 /* Otherwise, try mvn. */
3674 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3676 case PLUS:
3677 /* See if we can use addw or subw. */
3678 if (TARGET_THUMB2
3679 && ((i & 0xfffff000) == 0
3680 || ((-i) & 0xfffff000) == 0))
3681 return 1;
3682 /* else fall through. */
3684 case COMPARE:
3685 case EQ:
3686 case NE:
3687 case GT:
3688 case LE:
3689 case LT:
3690 case GE:
3691 case GEU:
3692 case LTU:
3693 case GTU:
3694 case LEU:
3695 case UNORDERED:
3696 case ORDERED:
3697 case UNEQ:
3698 case UNGE:
3699 case UNLT:
3700 case UNGT:
3701 case UNLE:
3702 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3704 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3705 case XOR:
3706 return 0;
3708 case IOR:
3709 if (TARGET_THUMB2)
3710 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3711 return 0;
3713 case AND:
3714 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3716 default:
3717 gcc_unreachable ();
3721 /* Return true if I is a valid di mode constant for the operation CODE. */
3723 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3725 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3726 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3727 rtx hi = GEN_INT (hi_val);
3728 rtx lo = GEN_INT (lo_val);
3730 if (TARGET_THUMB1)
3731 return 0;
3733 switch (code)
3735 case AND:
3736 case IOR:
3737 case XOR:
3738 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3739 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3740 case PLUS:
3741 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3743 default:
3744 return 0;
3748 /* Emit a sequence of insns to handle a large constant.
3749 CODE is the code of the operation required, it can be any of SET, PLUS,
3750 IOR, AND, XOR, MINUS;
3751 MODE is the mode in which the operation is being performed;
3752 VAL is the integer to operate on;
3753 SOURCE is the other operand (a register, or a null-pointer for SET);
3754 SUBTARGETS means it is safe to create scratch registers if that will
3755 either produce a simpler sequence, or we will want to cse the values.
3756 Return value is the number of insns emitted. */
3758 /* ??? Tweak this for thumb2. */
3760 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
3761 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3763 rtx cond;
3765 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3766 cond = COND_EXEC_TEST (PATTERN (insn));
3767 else
3768 cond = NULL_RTX;
3770 if (subtargets || code == SET
3771 || (REG_P (target) && REG_P (source)
3772 && REGNO (target) != REGNO (source)))
3774 /* After arm_reorg has been called, we can't fix up expensive
3775 constants by pushing them into memory so we must synthesize
3776 them in-line, regardless of the cost. This is only likely to
3777 be more costly on chips that have load delay slots and we are
3778 compiling without running the scheduler (so no splitting
3779 occurred before the final instruction emission).
3781 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3783 if (!cfun->machine->after_arm_reorg
3784 && !cond
3785 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3786 1, 0)
3787 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3788 + (code != SET))))
3790 if (code == SET)
3792 /* Currently SET is the only monadic value for CODE, all
3793 the rest are diadic. */
3794 if (TARGET_USE_MOVT)
3795 arm_emit_movpair (target, GEN_INT (val));
3796 else
3797 emit_set_insn (target, GEN_INT (val));
3799 return 1;
3801 else
3803 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3805 if (TARGET_USE_MOVT)
3806 arm_emit_movpair (temp, GEN_INT (val));
3807 else
3808 emit_set_insn (temp, GEN_INT (val));
3810 /* For MINUS, the value is subtracted from, since we never
3811 have subtraction of a constant. */
3812 if (code == MINUS)
3813 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3814 else
3815 emit_set_insn (target,
3816 gen_rtx_fmt_ee (code, mode, source, temp));
3817 return 2;
3822 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3826 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3827 ARM/THUMB2 immediates, and add up to VAL.
3828 Thr function return value gives the number of insns required. */
3829 static int
3830 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3831 struct four_ints *return_sequence)
3833 int best_consecutive_zeros = 0;
3834 int i;
3835 int best_start = 0;
3836 int insns1, insns2;
3837 struct four_ints tmp_sequence;
3839 /* If we aren't targeting ARM, the best place to start is always at
3840 the bottom, otherwise look more closely. */
3841 if (TARGET_ARM)
3843 for (i = 0; i < 32; i += 2)
3845 int consecutive_zeros = 0;
3847 if (!(val & (3 << i)))
3849 while ((i < 32) && !(val & (3 << i)))
3851 consecutive_zeros += 2;
3852 i += 2;
3854 if (consecutive_zeros > best_consecutive_zeros)
3856 best_consecutive_zeros = consecutive_zeros;
3857 best_start = i - consecutive_zeros;
3859 i -= 2;
3864 /* So long as it won't require any more insns to do so, it's
3865 desirable to emit a small constant (in bits 0...9) in the last
3866 insn. This way there is more chance that it can be combined with
3867 a later addressing insn to form a pre-indexed load or store
3868 operation. Consider:
3870 *((volatile int *)0xe0000100) = 1;
3871 *((volatile int *)0xe0000110) = 2;
3873 We want this to wind up as:
3875 mov rA, #0xe0000000
3876 mov rB, #1
3877 str rB, [rA, #0x100]
3878 mov rB, #2
3879 str rB, [rA, #0x110]
3881 rather than having to synthesize both large constants from scratch.
3883 Therefore, we calculate how many insns would be required to emit
3884 the constant starting from `best_start', and also starting from
3885 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3886 yield a shorter sequence, we may as well use zero. */
3887 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3888 if (best_start != 0
3889 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3891 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3892 if (insns2 <= insns1)
3894 *return_sequence = tmp_sequence;
3895 insns1 = insns2;
3899 return insns1;
3902 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3903 static int
3904 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3905 struct four_ints *return_sequence, int i)
3907 int remainder = val & 0xffffffff;
3908 int insns = 0;
3910 /* Try and find a way of doing the job in either two or three
3911 instructions.
3913 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3914 location. We start at position I. This may be the MSB, or
3915 optimial_immediate_sequence may have positioned it at the largest block
3916 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3917 wrapping around to the top of the word when we drop off the bottom.
3918 In the worst case this code should produce no more than four insns.
3920 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3921 constants, shifted to any arbitrary location. We should always start
3922 at the MSB. */
3925 int end;
3926 unsigned int b1, b2, b3, b4;
3927 unsigned HOST_WIDE_INT result;
3928 int loc;
3930 gcc_assert (insns < 4);
3932 if (i <= 0)
3933 i += 32;
3935 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3936 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3938 loc = i;
3939 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3940 /* We can use addw/subw for the last 12 bits. */
3941 result = remainder;
3942 else
3944 /* Use an 8-bit shifted/rotated immediate. */
3945 end = i - 8;
3946 if (end < 0)
3947 end += 32;
3948 result = remainder & ((0x0ff << end)
3949 | ((i < end) ? (0xff >> (32 - end))
3950 : 0));
3951 i -= 8;
3954 else
3956 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3957 arbitrary shifts. */
3958 i -= TARGET_ARM ? 2 : 1;
3959 continue;
3962 /* Next, see if we can do a better job with a thumb2 replicated
3963 constant.
3965 We do it this way around to catch the cases like 0x01F001E0 where
3966 two 8-bit immediates would work, but a replicated constant would
3967 make it worse.
3969 TODO: 16-bit constants that don't clear all the bits, but still win.
3970 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3971 if (TARGET_THUMB2)
3973 b1 = (remainder & 0xff000000) >> 24;
3974 b2 = (remainder & 0x00ff0000) >> 16;
3975 b3 = (remainder & 0x0000ff00) >> 8;
3976 b4 = remainder & 0xff;
3978 if (loc > 24)
3980 /* The 8-bit immediate already found clears b1 (and maybe b2),
3981 but must leave b3 and b4 alone. */
3983 /* First try to find a 32-bit replicated constant that clears
3984 almost everything. We can assume that we can't do it in one,
3985 or else we wouldn't be here. */
3986 unsigned int tmp = b1 & b2 & b3 & b4;
3987 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3988 + (tmp << 24);
3989 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3990 + (tmp == b3) + (tmp == b4);
3991 if (tmp
3992 && (matching_bytes >= 3
3993 || (matching_bytes == 2
3994 && const_ok_for_op (remainder & ~tmp2, code))))
3996 /* At least 3 of the bytes match, and the fourth has at
3997 least as many bits set, or two of the bytes match
3998 and it will only require one more insn to finish. */
3999 result = tmp2;
4000 i = tmp != b1 ? 32
4001 : tmp != b2 ? 24
4002 : tmp != b3 ? 16
4003 : 8;
4006 /* Second, try to find a 16-bit replicated constant that can
4007 leave three of the bytes clear. If b2 or b4 is already
4008 zero, then we can. If the 8-bit from above would not
4009 clear b2 anyway, then we still win. */
4010 else if (b1 == b3 && (!b2 || !b4
4011 || (remainder & 0x00ff0000 & ~result)))
4013 result = remainder & 0xff00ff00;
4014 i = 24;
4017 else if (loc > 16)
4019 /* The 8-bit immediate already found clears b2 (and maybe b3)
4020 and we don't get here unless b1 is alredy clear, but it will
4021 leave b4 unchanged. */
4023 /* If we can clear b2 and b4 at once, then we win, since the
4024 8-bits couldn't possibly reach that far. */
4025 if (b2 == b4)
4027 result = remainder & 0x00ff00ff;
4028 i = 16;
4033 return_sequence->i[insns++] = result;
4034 remainder &= ~result;
4036 if (code == SET || code == MINUS)
4037 code = PLUS;
4039 while (remainder);
4041 return insns;
4044 /* Emit an instruction with the indicated PATTERN. If COND is
4045 non-NULL, conditionalize the execution of the instruction on COND
4046 being true. */
4048 static void
4049 emit_constant_insn (rtx cond, rtx pattern)
4051 if (cond)
4052 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4053 emit_insn (pattern);
4056 /* As above, but extra parameter GENERATE which, if clear, suppresses
4057 RTL generation. */
4059 static int
4060 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4061 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
4062 int generate)
4064 int can_invert = 0;
4065 int can_negate = 0;
4066 int final_invert = 0;
4067 int i;
4068 int set_sign_bit_copies = 0;
4069 int clear_sign_bit_copies = 0;
4070 int clear_zero_bit_copies = 0;
4071 int set_zero_bit_copies = 0;
4072 int insns = 0, neg_insns, inv_insns;
4073 unsigned HOST_WIDE_INT temp1, temp2;
4074 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4075 struct four_ints *immediates;
4076 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4078 /* Find out which operations are safe for a given CODE. Also do a quick
4079 check for degenerate cases; these can occur when DImode operations
4080 are split. */
4081 switch (code)
4083 case SET:
4084 can_invert = 1;
4085 break;
4087 case PLUS:
4088 can_negate = 1;
4089 break;
4091 case IOR:
4092 if (remainder == 0xffffffff)
4094 if (generate)
4095 emit_constant_insn (cond,
4096 gen_rtx_SET (VOIDmode, target,
4097 GEN_INT (ARM_SIGN_EXTEND (val))));
4098 return 1;
4101 if (remainder == 0)
4103 if (reload_completed && rtx_equal_p (target, source))
4104 return 0;
4106 if (generate)
4107 emit_constant_insn (cond,
4108 gen_rtx_SET (VOIDmode, target, source));
4109 return 1;
4111 break;
4113 case AND:
4114 if (remainder == 0)
4116 if (generate)
4117 emit_constant_insn (cond,
4118 gen_rtx_SET (VOIDmode, target, const0_rtx));
4119 return 1;
4121 if (remainder == 0xffffffff)
4123 if (reload_completed && rtx_equal_p (target, source))
4124 return 0;
4125 if (generate)
4126 emit_constant_insn (cond,
4127 gen_rtx_SET (VOIDmode, target, source));
4128 return 1;
4130 can_invert = 1;
4131 break;
4133 case XOR:
4134 if (remainder == 0)
4136 if (reload_completed && rtx_equal_p (target, source))
4137 return 0;
4138 if (generate)
4139 emit_constant_insn (cond,
4140 gen_rtx_SET (VOIDmode, target, source));
4141 return 1;
4144 if (remainder == 0xffffffff)
4146 if (generate)
4147 emit_constant_insn (cond,
4148 gen_rtx_SET (VOIDmode, target,
4149 gen_rtx_NOT (mode, source)));
4150 return 1;
4152 final_invert = 1;
4153 break;
4155 case MINUS:
4156 /* We treat MINUS as (val - source), since (source - val) is always
4157 passed as (source + (-val)). */
4158 if (remainder == 0)
4160 if (generate)
4161 emit_constant_insn (cond,
4162 gen_rtx_SET (VOIDmode, target,
4163 gen_rtx_NEG (mode, source)));
4164 return 1;
4166 if (const_ok_for_arm (val))
4168 if (generate)
4169 emit_constant_insn (cond,
4170 gen_rtx_SET (VOIDmode, target,
4171 gen_rtx_MINUS (mode, GEN_INT (val),
4172 source)));
4173 return 1;
4176 break;
4178 default:
4179 gcc_unreachable ();
4182 /* If we can do it in one insn get out quickly. */
4183 if (const_ok_for_op (val, code))
4185 if (generate)
4186 emit_constant_insn (cond,
4187 gen_rtx_SET (VOIDmode, target,
4188 (source
4189 ? gen_rtx_fmt_ee (code, mode, source,
4190 GEN_INT (val))
4191 : GEN_INT (val))));
4192 return 1;
4195 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4196 insn. */
4197 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4198 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4200 if (generate)
4202 if (mode == SImode && i == 16)
4203 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4204 smaller insn. */
4205 emit_constant_insn (cond,
4206 gen_zero_extendhisi2
4207 (target, gen_lowpart (HImode, source)));
4208 else
4209 /* Extz only supports SImode, but we can coerce the operands
4210 into that mode. */
4211 emit_constant_insn (cond,
4212 gen_extzv_t2 (gen_lowpart (SImode, target),
4213 gen_lowpart (SImode, source),
4214 GEN_INT (i), const0_rtx));
4217 return 1;
4220 /* Calculate a few attributes that may be useful for specific
4221 optimizations. */
4222 /* Count number of leading zeros. */
4223 for (i = 31; i >= 0; i--)
4225 if ((remainder & (1 << i)) == 0)
4226 clear_sign_bit_copies++;
4227 else
4228 break;
4231 /* Count number of leading 1's. */
4232 for (i = 31; i >= 0; i--)
4234 if ((remainder & (1 << i)) != 0)
4235 set_sign_bit_copies++;
4236 else
4237 break;
4240 /* Count number of trailing zero's. */
4241 for (i = 0; i <= 31; i++)
4243 if ((remainder & (1 << i)) == 0)
4244 clear_zero_bit_copies++;
4245 else
4246 break;
4249 /* Count number of trailing 1's. */
4250 for (i = 0; i <= 31; i++)
4252 if ((remainder & (1 << i)) != 0)
4253 set_zero_bit_copies++;
4254 else
4255 break;
4258 switch (code)
4260 case SET:
4261 /* See if we can do this by sign_extending a constant that is known
4262 to be negative. This is a good, way of doing it, since the shift
4263 may well merge into a subsequent insn. */
4264 if (set_sign_bit_copies > 1)
4266 if (const_ok_for_arm
4267 (temp1 = ARM_SIGN_EXTEND (remainder
4268 << (set_sign_bit_copies - 1))))
4270 if (generate)
4272 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4273 emit_constant_insn (cond,
4274 gen_rtx_SET (VOIDmode, new_src,
4275 GEN_INT (temp1)));
4276 emit_constant_insn (cond,
4277 gen_ashrsi3 (target, new_src,
4278 GEN_INT (set_sign_bit_copies - 1)));
4280 return 2;
4282 /* For an inverted constant, we will need to set the low bits,
4283 these will be shifted out of harm's way. */
4284 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4285 if (const_ok_for_arm (~temp1))
4287 if (generate)
4289 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4290 emit_constant_insn (cond,
4291 gen_rtx_SET (VOIDmode, new_src,
4292 GEN_INT (temp1)));
4293 emit_constant_insn (cond,
4294 gen_ashrsi3 (target, new_src,
4295 GEN_INT (set_sign_bit_copies - 1)));
4297 return 2;
4301 /* See if we can calculate the value as the difference between two
4302 valid immediates. */
4303 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4305 int topshift = clear_sign_bit_copies & ~1;
4307 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4308 & (0xff000000 >> topshift));
4310 /* If temp1 is zero, then that means the 9 most significant
4311 bits of remainder were 1 and we've caused it to overflow.
4312 When topshift is 0 we don't need to do anything since we
4313 can borrow from 'bit 32'. */
4314 if (temp1 == 0 && topshift != 0)
4315 temp1 = 0x80000000 >> (topshift - 1);
4317 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4319 if (const_ok_for_arm (temp2))
4321 if (generate)
4323 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4324 emit_constant_insn (cond,
4325 gen_rtx_SET (VOIDmode, new_src,
4326 GEN_INT (temp1)));
4327 emit_constant_insn (cond,
4328 gen_addsi3 (target, new_src,
4329 GEN_INT (-temp2)));
4332 return 2;
4336 /* See if we can generate this by setting the bottom (or the top)
4337 16 bits, and then shifting these into the other half of the
4338 word. We only look for the simplest cases, to do more would cost
4339 too much. Be careful, however, not to generate this when the
4340 alternative would take fewer insns. */
4341 if (val & 0xffff0000)
4343 temp1 = remainder & 0xffff0000;
4344 temp2 = remainder & 0x0000ffff;
4346 /* Overlaps outside this range are best done using other methods. */
4347 for (i = 9; i < 24; i++)
4349 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4350 && !const_ok_for_arm (temp2))
4352 rtx new_src = (subtargets
4353 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4354 : target);
4355 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4356 source, subtargets, generate);
4357 source = new_src;
4358 if (generate)
4359 emit_constant_insn
4360 (cond,
4361 gen_rtx_SET
4362 (VOIDmode, target,
4363 gen_rtx_IOR (mode,
4364 gen_rtx_ASHIFT (mode, source,
4365 GEN_INT (i)),
4366 source)));
4367 return insns + 1;
4371 /* Don't duplicate cases already considered. */
4372 for (i = 17; i < 24; i++)
4374 if (((temp1 | (temp1 >> i)) == remainder)
4375 && !const_ok_for_arm (temp1))
4377 rtx new_src = (subtargets
4378 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4379 : target);
4380 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4381 source, subtargets, generate);
4382 source = new_src;
4383 if (generate)
4384 emit_constant_insn
4385 (cond,
4386 gen_rtx_SET (VOIDmode, target,
4387 gen_rtx_IOR
4388 (mode,
4389 gen_rtx_LSHIFTRT (mode, source,
4390 GEN_INT (i)),
4391 source)));
4392 return insns + 1;
4396 break;
4398 case IOR:
4399 case XOR:
4400 /* If we have IOR or XOR, and the constant can be loaded in a
4401 single instruction, and we can find a temporary to put it in,
4402 then this can be done in two instructions instead of 3-4. */
4403 if (subtargets
4404 /* TARGET can't be NULL if SUBTARGETS is 0 */
4405 || (reload_completed && !reg_mentioned_p (target, source)))
4407 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4409 if (generate)
4411 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4413 emit_constant_insn (cond,
4414 gen_rtx_SET (VOIDmode, sub,
4415 GEN_INT (val)));
4416 emit_constant_insn (cond,
4417 gen_rtx_SET (VOIDmode, target,
4418 gen_rtx_fmt_ee (code, mode,
4419 source, sub)));
4421 return 2;
4425 if (code == XOR)
4426 break;
4428 /* Convert.
4429 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4430 and the remainder 0s for e.g. 0xfff00000)
4431 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4433 This can be done in 2 instructions by using shifts with mov or mvn.
4434 e.g. for
4435 x = x | 0xfff00000;
4436 we generate.
4437 mvn r0, r0, asl #12
4438 mvn r0, r0, lsr #12 */
4439 if (set_sign_bit_copies > 8
4440 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4442 if (generate)
4444 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4445 rtx shift = GEN_INT (set_sign_bit_copies);
4447 emit_constant_insn
4448 (cond,
4449 gen_rtx_SET (VOIDmode, sub,
4450 gen_rtx_NOT (mode,
4451 gen_rtx_ASHIFT (mode,
4452 source,
4453 shift))));
4454 emit_constant_insn
4455 (cond,
4456 gen_rtx_SET (VOIDmode, target,
4457 gen_rtx_NOT (mode,
4458 gen_rtx_LSHIFTRT (mode, sub,
4459 shift))));
4461 return 2;
4464 /* Convert
4465 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4467 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4469 For eg. r0 = r0 | 0xfff
4470 mvn r0, r0, lsr #12
4471 mvn r0, r0, asl #12
4474 if (set_zero_bit_copies > 8
4475 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4477 if (generate)
4479 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4480 rtx shift = GEN_INT (set_zero_bit_copies);
4482 emit_constant_insn
4483 (cond,
4484 gen_rtx_SET (VOIDmode, sub,
4485 gen_rtx_NOT (mode,
4486 gen_rtx_LSHIFTRT (mode,
4487 source,
4488 shift))));
4489 emit_constant_insn
4490 (cond,
4491 gen_rtx_SET (VOIDmode, target,
4492 gen_rtx_NOT (mode,
4493 gen_rtx_ASHIFT (mode, sub,
4494 shift))));
4496 return 2;
4499 /* This will never be reached for Thumb2 because orn is a valid
4500 instruction. This is for Thumb1 and the ARM 32 bit cases.
4502 x = y | constant (such that ~constant is a valid constant)
4503 Transform this to
4504 x = ~(~y & ~constant).
4506 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4508 if (generate)
4510 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4511 emit_constant_insn (cond,
4512 gen_rtx_SET (VOIDmode, sub,
4513 gen_rtx_NOT (mode, source)));
4514 source = sub;
4515 if (subtargets)
4516 sub = gen_reg_rtx (mode);
4517 emit_constant_insn (cond,
4518 gen_rtx_SET (VOIDmode, sub,
4519 gen_rtx_AND (mode, source,
4520 GEN_INT (temp1))));
4521 emit_constant_insn (cond,
4522 gen_rtx_SET (VOIDmode, target,
4523 gen_rtx_NOT (mode, sub)));
4525 return 3;
4527 break;
4529 case AND:
4530 /* See if two shifts will do 2 or more insn's worth of work. */
4531 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4533 HOST_WIDE_INT shift_mask = ((0xffffffff
4534 << (32 - clear_sign_bit_copies))
4535 & 0xffffffff);
4537 if ((remainder | shift_mask) != 0xffffffff)
4539 HOST_WIDE_INT new_val
4540 = ARM_SIGN_EXTEND (remainder | shift_mask);
4542 if (generate)
4544 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4545 insns = arm_gen_constant (AND, SImode, cond, new_val,
4546 new_src, source, subtargets, 1);
4547 source = new_src;
4549 else
4551 rtx targ = subtargets ? NULL_RTX : target;
4552 insns = arm_gen_constant (AND, mode, cond, new_val,
4553 targ, source, subtargets, 0);
4557 if (generate)
4559 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4560 rtx shift = GEN_INT (clear_sign_bit_copies);
4562 emit_insn (gen_ashlsi3 (new_src, source, shift));
4563 emit_insn (gen_lshrsi3 (target, new_src, shift));
4566 return insns + 2;
4569 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4571 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4573 if ((remainder | shift_mask) != 0xffffffff)
4575 HOST_WIDE_INT new_val
4576 = ARM_SIGN_EXTEND (remainder | shift_mask);
4577 if (generate)
4579 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4581 insns = arm_gen_constant (AND, mode, cond, new_val,
4582 new_src, source, subtargets, 1);
4583 source = new_src;
4585 else
4587 rtx targ = subtargets ? NULL_RTX : target;
4589 insns = arm_gen_constant (AND, mode, cond, new_val,
4590 targ, source, subtargets, 0);
4594 if (generate)
4596 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4597 rtx shift = GEN_INT (clear_zero_bit_copies);
4599 emit_insn (gen_lshrsi3 (new_src, source, shift));
4600 emit_insn (gen_ashlsi3 (target, new_src, shift));
4603 return insns + 2;
4606 break;
4608 default:
4609 break;
4612 /* Calculate what the instruction sequences would be if we generated it
4613 normally, negated, or inverted. */
4614 if (code == AND)
4615 /* AND cannot be split into multiple insns, so invert and use BIC. */
4616 insns = 99;
4617 else
4618 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4620 if (can_negate)
4621 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4622 &neg_immediates);
4623 else
4624 neg_insns = 99;
4626 if (can_invert || final_invert)
4627 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4628 &inv_immediates);
4629 else
4630 inv_insns = 99;
4632 immediates = &pos_immediates;
4634 /* Is the negated immediate sequence more efficient? */
4635 if (neg_insns < insns && neg_insns <= inv_insns)
4637 insns = neg_insns;
4638 immediates = &neg_immediates;
4640 else
4641 can_negate = 0;
4643 /* Is the inverted immediate sequence more efficient?
4644 We must allow for an extra NOT instruction for XOR operations, although
4645 there is some chance that the final 'mvn' will get optimized later. */
4646 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4648 insns = inv_insns;
4649 immediates = &inv_immediates;
4651 else
4653 can_invert = 0;
4654 final_invert = 0;
4657 /* Now output the chosen sequence as instructions. */
4658 if (generate)
4660 for (i = 0; i < insns; i++)
4662 rtx new_src, temp1_rtx;
4664 temp1 = immediates->i[i];
4666 if (code == SET || code == MINUS)
4667 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4668 else if ((final_invert || i < (insns - 1)) && subtargets)
4669 new_src = gen_reg_rtx (mode);
4670 else
4671 new_src = target;
4673 if (can_invert)
4674 temp1 = ~temp1;
4675 else if (can_negate)
4676 temp1 = -temp1;
4678 temp1 = trunc_int_for_mode (temp1, mode);
4679 temp1_rtx = GEN_INT (temp1);
4681 if (code == SET)
4683 else if (code == MINUS)
4684 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4685 else
4686 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4688 emit_constant_insn (cond,
4689 gen_rtx_SET (VOIDmode, new_src,
4690 temp1_rtx));
4691 source = new_src;
4693 if (code == SET)
4695 can_negate = can_invert;
4696 can_invert = 0;
4697 code = PLUS;
4699 else if (code == MINUS)
4700 code = PLUS;
4704 if (final_invert)
4706 if (generate)
4707 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4708 gen_rtx_NOT (mode, source)));
4709 insns++;
4712 return insns;
4715 /* Canonicalize a comparison so that we are more likely to recognize it.
4716 This can be done for a few constant compares, where we can make the
4717 immediate value easier to load. */
4719 static void
4720 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4721 bool op0_preserve_value)
4723 machine_mode mode;
4724 unsigned HOST_WIDE_INT i, maxval;
4726 mode = GET_MODE (*op0);
4727 if (mode == VOIDmode)
4728 mode = GET_MODE (*op1);
4730 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4732 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4733 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4734 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4735 for GTU/LEU in Thumb mode. */
4736 if (mode == DImode)
4739 if (*code == GT || *code == LE
4740 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4742 /* Missing comparison. First try to use an available
4743 comparison. */
4744 if (CONST_INT_P (*op1))
4746 i = INTVAL (*op1);
4747 switch (*code)
4749 case GT:
4750 case LE:
4751 if (i != maxval
4752 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4754 *op1 = GEN_INT (i + 1);
4755 *code = *code == GT ? GE : LT;
4756 return;
4758 break;
4759 case GTU:
4760 case LEU:
4761 if (i != ~((unsigned HOST_WIDE_INT) 0)
4762 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4764 *op1 = GEN_INT (i + 1);
4765 *code = *code == GTU ? GEU : LTU;
4766 return;
4768 break;
4769 default:
4770 gcc_unreachable ();
4774 /* If that did not work, reverse the condition. */
4775 if (!op0_preserve_value)
4777 std::swap (*op0, *op1);
4778 *code = (int)swap_condition ((enum rtx_code)*code);
4781 return;
4784 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4785 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4786 to facilitate possible combining with a cmp into 'ands'. */
4787 if (mode == SImode
4788 && GET_CODE (*op0) == ZERO_EXTEND
4789 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4790 && GET_MODE (XEXP (*op0, 0)) == QImode
4791 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4792 && subreg_lowpart_p (XEXP (*op0, 0))
4793 && *op1 == const0_rtx)
4794 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4795 GEN_INT (255));
4797 /* Comparisons smaller than DImode. Only adjust comparisons against
4798 an out-of-range constant. */
4799 if (!CONST_INT_P (*op1)
4800 || const_ok_for_arm (INTVAL (*op1))
4801 || const_ok_for_arm (- INTVAL (*op1)))
4802 return;
4804 i = INTVAL (*op1);
4806 switch (*code)
4808 case EQ:
4809 case NE:
4810 return;
4812 case GT:
4813 case LE:
4814 if (i != maxval
4815 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4817 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
4818 *code = *code == GT ? GE : LT;
4819 return;
4821 break;
4823 case GE:
4824 case LT:
4825 if (i != ~maxval
4826 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4828 *op1 = GEN_INT (i - 1);
4829 *code = *code == GE ? GT : LE;
4830 return;
4832 break;
4834 case GTU:
4835 case LEU:
4836 if (i != ~((unsigned HOST_WIDE_INT) 0)
4837 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4839 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
4840 *code = *code == GTU ? GEU : LTU;
4841 return;
4843 break;
4845 case GEU:
4846 case LTU:
4847 if (i != 0
4848 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4850 *op1 = GEN_INT (i - 1);
4851 *code = *code == GEU ? GTU : LEU;
4852 return;
4854 break;
4856 default:
4857 gcc_unreachable ();
4862 /* Define how to find the value returned by a function. */
4864 static rtx
4865 arm_function_value(const_tree type, const_tree func,
4866 bool outgoing ATTRIBUTE_UNUSED)
4868 machine_mode mode;
4869 int unsignedp ATTRIBUTE_UNUSED;
4870 rtx r ATTRIBUTE_UNUSED;
4872 mode = TYPE_MODE (type);
4874 if (TARGET_AAPCS_BASED)
4875 return aapcs_allocate_return_reg (mode, type, func);
4877 /* Promote integer types. */
4878 if (INTEGRAL_TYPE_P (type))
4879 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4881 /* Promotes small structs returned in a register to full-word size
4882 for big-endian AAPCS. */
4883 if (arm_return_in_msb (type))
4885 HOST_WIDE_INT size = int_size_in_bytes (type);
4886 if (size % UNITS_PER_WORD != 0)
4888 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4889 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4893 return arm_libcall_value_1 (mode);
4896 /* libcall hashtable helpers. */
4898 struct libcall_hasher : typed_noop_remove <rtx_def>
4900 typedef const rtx_def *value_type;
4901 typedef const rtx_def *compare_type;
4902 static inline hashval_t hash (const rtx_def *);
4903 static inline bool equal (const rtx_def *, const rtx_def *);
4904 static inline void remove (rtx_def *);
4907 inline bool
4908 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
4910 return rtx_equal_p (p1, p2);
4913 inline hashval_t
4914 libcall_hasher::hash (const rtx_def *p1)
4916 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4919 typedef hash_table<libcall_hasher> libcall_table_type;
4921 static void
4922 add_libcall (libcall_table_type *htab, rtx libcall)
4924 *htab->find_slot (libcall, INSERT) = libcall;
4927 static bool
4928 arm_libcall_uses_aapcs_base (const_rtx libcall)
4930 static bool init_done = false;
4931 static libcall_table_type *libcall_htab = NULL;
4933 if (!init_done)
4935 init_done = true;
4937 libcall_htab = new libcall_table_type (31);
4938 add_libcall (libcall_htab,
4939 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4940 add_libcall (libcall_htab,
4941 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4942 add_libcall (libcall_htab,
4943 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4944 add_libcall (libcall_htab,
4945 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4947 add_libcall (libcall_htab,
4948 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4949 add_libcall (libcall_htab,
4950 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4951 add_libcall (libcall_htab,
4952 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4953 add_libcall (libcall_htab,
4954 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4956 add_libcall (libcall_htab,
4957 convert_optab_libfunc (sext_optab, SFmode, HFmode));
4958 add_libcall (libcall_htab,
4959 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4960 add_libcall (libcall_htab,
4961 convert_optab_libfunc (sfix_optab, SImode, DFmode));
4962 add_libcall (libcall_htab,
4963 convert_optab_libfunc (ufix_optab, SImode, DFmode));
4964 add_libcall (libcall_htab,
4965 convert_optab_libfunc (sfix_optab, DImode, DFmode));
4966 add_libcall (libcall_htab,
4967 convert_optab_libfunc (ufix_optab, DImode, DFmode));
4968 add_libcall (libcall_htab,
4969 convert_optab_libfunc (sfix_optab, DImode, SFmode));
4970 add_libcall (libcall_htab,
4971 convert_optab_libfunc (ufix_optab, DImode, SFmode));
4973 /* Values from double-precision helper functions are returned in core
4974 registers if the selected core only supports single-precision
4975 arithmetic, even if we are using the hard-float ABI. The same is
4976 true for single-precision helpers, but we will never be using the
4977 hard-float ABI on a CPU which doesn't support single-precision
4978 operations in hardware. */
4979 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4980 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4981 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4982 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4983 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4984 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4985 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4986 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4987 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4988 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4989 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4990 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4991 SFmode));
4992 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4993 DFmode));
4996 return libcall && libcall_htab->find (libcall) != NULL;
4999 static rtx
5000 arm_libcall_value_1 (machine_mode mode)
5002 if (TARGET_AAPCS_BASED)
5003 return aapcs_libcall_value (mode);
5004 else if (TARGET_IWMMXT_ABI
5005 && arm_vector_mode_supported_p (mode))
5006 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5007 else
5008 return gen_rtx_REG (mode, ARG_REGISTER (1));
5011 /* Define how to find the value returned by a library function
5012 assuming the value has mode MODE. */
5014 static rtx
5015 arm_libcall_value (machine_mode mode, const_rtx libcall)
5017 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5018 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5020 /* The following libcalls return their result in integer registers,
5021 even though they return a floating point value. */
5022 if (arm_libcall_uses_aapcs_base (libcall))
5023 return gen_rtx_REG (mode, ARG_REGISTER(1));
5027 return arm_libcall_value_1 (mode);
5030 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5032 static bool
5033 arm_function_value_regno_p (const unsigned int regno)
5035 if (regno == ARG_REGISTER (1)
5036 || (TARGET_32BIT
5037 && TARGET_AAPCS_BASED
5038 && TARGET_VFP
5039 && TARGET_HARD_FLOAT
5040 && regno == FIRST_VFP_REGNUM)
5041 || (TARGET_IWMMXT_ABI
5042 && regno == FIRST_IWMMXT_REGNUM))
5043 return true;
5045 return false;
5048 /* Determine the amount of memory needed to store the possible return
5049 registers of an untyped call. */
5051 arm_apply_result_size (void)
5053 int size = 16;
5055 if (TARGET_32BIT)
5057 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
5058 size += 32;
5059 if (TARGET_IWMMXT_ABI)
5060 size += 8;
5063 return size;
5066 /* Decide whether TYPE should be returned in memory (true)
5067 or in a register (false). FNTYPE is the type of the function making
5068 the call. */
5069 static bool
5070 arm_return_in_memory (const_tree type, const_tree fntype)
5072 HOST_WIDE_INT size;
5074 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5076 if (TARGET_AAPCS_BASED)
5078 /* Simple, non-aggregate types (ie not including vectors and
5079 complex) are always returned in a register (or registers).
5080 We don't care about which register here, so we can short-cut
5081 some of the detail. */
5082 if (!AGGREGATE_TYPE_P (type)
5083 && TREE_CODE (type) != VECTOR_TYPE
5084 && TREE_CODE (type) != COMPLEX_TYPE)
5085 return false;
5087 /* Any return value that is no larger than one word can be
5088 returned in r0. */
5089 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5090 return false;
5092 /* Check any available co-processors to see if they accept the
5093 type as a register candidate (VFP, for example, can return
5094 some aggregates in consecutive registers). These aren't
5095 available if the call is variadic. */
5096 if (aapcs_select_return_coproc (type, fntype) >= 0)
5097 return false;
5099 /* Vector values should be returned using ARM registers, not
5100 memory (unless they're over 16 bytes, which will break since
5101 we only have four call-clobbered registers to play with). */
5102 if (TREE_CODE (type) == VECTOR_TYPE)
5103 return (size < 0 || size > (4 * UNITS_PER_WORD));
5105 /* The rest go in memory. */
5106 return true;
5109 if (TREE_CODE (type) == VECTOR_TYPE)
5110 return (size < 0 || size > (4 * UNITS_PER_WORD));
5112 if (!AGGREGATE_TYPE_P (type) &&
5113 (TREE_CODE (type) != VECTOR_TYPE))
5114 /* All simple types are returned in registers. */
5115 return false;
5117 if (arm_abi != ARM_ABI_APCS)
5119 /* ATPCS and later return aggregate types in memory only if they are
5120 larger than a word (or are variable size). */
5121 return (size < 0 || size > UNITS_PER_WORD);
5124 /* For the arm-wince targets we choose to be compatible with Microsoft's
5125 ARM and Thumb compilers, which always return aggregates in memory. */
5126 #ifndef ARM_WINCE
5127 /* All structures/unions bigger than one word are returned in memory.
5128 Also catch the case where int_size_in_bytes returns -1. In this case
5129 the aggregate is either huge or of variable size, and in either case
5130 we will want to return it via memory and not in a register. */
5131 if (size < 0 || size > UNITS_PER_WORD)
5132 return true;
5134 if (TREE_CODE (type) == RECORD_TYPE)
5136 tree field;
5138 /* For a struct the APCS says that we only return in a register
5139 if the type is 'integer like' and every addressable element
5140 has an offset of zero. For practical purposes this means
5141 that the structure can have at most one non bit-field element
5142 and that this element must be the first one in the structure. */
5144 /* Find the first field, ignoring non FIELD_DECL things which will
5145 have been created by C++. */
5146 for (field = TYPE_FIELDS (type);
5147 field && TREE_CODE (field) != FIELD_DECL;
5148 field = DECL_CHAIN (field))
5149 continue;
5151 if (field == NULL)
5152 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5154 /* Check that the first field is valid for returning in a register. */
5156 /* ... Floats are not allowed */
5157 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5158 return true;
5160 /* ... Aggregates that are not themselves valid for returning in
5161 a register are not allowed. */
5162 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5163 return true;
5165 /* Now check the remaining fields, if any. Only bitfields are allowed,
5166 since they are not addressable. */
5167 for (field = DECL_CHAIN (field);
5168 field;
5169 field = DECL_CHAIN (field))
5171 if (TREE_CODE (field) != FIELD_DECL)
5172 continue;
5174 if (!DECL_BIT_FIELD_TYPE (field))
5175 return true;
5178 return false;
5181 if (TREE_CODE (type) == UNION_TYPE)
5183 tree field;
5185 /* Unions can be returned in registers if every element is
5186 integral, or can be returned in an integer register. */
5187 for (field = TYPE_FIELDS (type);
5188 field;
5189 field = DECL_CHAIN (field))
5191 if (TREE_CODE (field) != FIELD_DECL)
5192 continue;
5194 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5195 return true;
5197 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5198 return true;
5201 return false;
5203 #endif /* not ARM_WINCE */
5205 /* Return all other types in memory. */
5206 return true;
5209 const struct pcs_attribute_arg
5211 const char *arg;
5212 enum arm_pcs value;
5213 } pcs_attribute_args[] =
5215 {"aapcs", ARM_PCS_AAPCS},
5216 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5217 #if 0
5218 /* We could recognize these, but changes would be needed elsewhere
5219 * to implement them. */
5220 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5221 {"atpcs", ARM_PCS_ATPCS},
5222 {"apcs", ARM_PCS_APCS},
5223 #endif
5224 {NULL, ARM_PCS_UNKNOWN}
5227 static enum arm_pcs
5228 arm_pcs_from_attribute (tree attr)
5230 const struct pcs_attribute_arg *ptr;
5231 const char *arg;
5233 /* Get the value of the argument. */
5234 if (TREE_VALUE (attr) == NULL_TREE
5235 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5236 return ARM_PCS_UNKNOWN;
5238 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5240 /* Check it against the list of known arguments. */
5241 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5242 if (streq (arg, ptr->arg))
5243 return ptr->value;
5245 /* An unrecognized interrupt type. */
5246 return ARM_PCS_UNKNOWN;
5249 /* Get the PCS variant to use for this call. TYPE is the function's type
5250 specification, DECL is the specific declartion. DECL may be null if
5251 the call could be indirect or if this is a library call. */
5252 static enum arm_pcs
5253 arm_get_pcs_model (const_tree type, const_tree decl)
5255 bool user_convention = false;
5256 enum arm_pcs user_pcs = arm_pcs_default;
5257 tree attr;
5259 gcc_assert (type);
5261 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5262 if (attr)
5264 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5265 user_convention = true;
5268 if (TARGET_AAPCS_BASED)
5270 /* Detect varargs functions. These always use the base rules
5271 (no argument is ever a candidate for a co-processor
5272 register). */
5273 bool base_rules = stdarg_p (type);
5275 if (user_convention)
5277 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5278 sorry ("non-AAPCS derived PCS variant");
5279 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5280 error ("variadic functions must use the base AAPCS variant");
5283 if (base_rules)
5284 return ARM_PCS_AAPCS;
5285 else if (user_convention)
5286 return user_pcs;
5287 else if (decl && flag_unit_at_a_time)
5289 /* Local functions never leak outside this compilation unit,
5290 so we are free to use whatever conventions are
5291 appropriate. */
5292 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5293 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5294 if (i && i->local)
5295 return ARM_PCS_AAPCS_LOCAL;
5298 else if (user_convention && user_pcs != arm_pcs_default)
5299 sorry ("PCS variant");
5301 /* For everything else we use the target's default. */
5302 return arm_pcs_default;
5306 static void
5307 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5308 const_tree fntype ATTRIBUTE_UNUSED,
5309 rtx libcall ATTRIBUTE_UNUSED,
5310 const_tree fndecl ATTRIBUTE_UNUSED)
5312 /* Record the unallocated VFP registers. */
5313 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5314 pcum->aapcs_vfp_reg_alloc = 0;
5317 /* Walk down the type tree of TYPE counting consecutive base elements.
5318 If *MODEP is VOIDmode, then set it to the first valid floating point
5319 type. If a non-floating point type is found, or if a floating point
5320 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5321 otherwise return the count in the sub-tree. */
5322 static int
5323 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5325 machine_mode mode;
5326 HOST_WIDE_INT size;
5328 switch (TREE_CODE (type))
5330 case REAL_TYPE:
5331 mode = TYPE_MODE (type);
5332 if (mode != DFmode && mode != SFmode)
5333 return -1;
5335 if (*modep == VOIDmode)
5336 *modep = mode;
5338 if (*modep == mode)
5339 return 1;
5341 break;
5343 case COMPLEX_TYPE:
5344 mode = TYPE_MODE (TREE_TYPE (type));
5345 if (mode != DFmode && mode != SFmode)
5346 return -1;
5348 if (*modep == VOIDmode)
5349 *modep = mode;
5351 if (*modep == mode)
5352 return 2;
5354 break;
5356 case VECTOR_TYPE:
5357 /* Use V2SImode and V4SImode as representatives of all 64-bit
5358 and 128-bit vector types, whether or not those modes are
5359 supported with the present options. */
5360 size = int_size_in_bytes (type);
5361 switch (size)
5363 case 8:
5364 mode = V2SImode;
5365 break;
5366 case 16:
5367 mode = V4SImode;
5368 break;
5369 default:
5370 return -1;
5373 if (*modep == VOIDmode)
5374 *modep = mode;
5376 /* Vector modes are considered to be opaque: two vectors are
5377 equivalent for the purposes of being homogeneous aggregates
5378 if they are the same size. */
5379 if (*modep == mode)
5380 return 1;
5382 break;
5384 case ARRAY_TYPE:
5386 int count;
5387 tree index = TYPE_DOMAIN (type);
5389 /* Can't handle incomplete types nor sizes that are not
5390 fixed. */
5391 if (!COMPLETE_TYPE_P (type)
5392 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5393 return -1;
5395 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5396 if (count == -1
5397 || !index
5398 || !TYPE_MAX_VALUE (index)
5399 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5400 || !TYPE_MIN_VALUE (index)
5401 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5402 || count < 0)
5403 return -1;
5405 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5406 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5408 /* There must be no padding. */
5409 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5410 return -1;
5412 return count;
5415 case RECORD_TYPE:
5417 int count = 0;
5418 int sub_count;
5419 tree field;
5421 /* Can't handle incomplete types nor sizes that are not
5422 fixed. */
5423 if (!COMPLETE_TYPE_P (type)
5424 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5425 return -1;
5427 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5429 if (TREE_CODE (field) != FIELD_DECL)
5430 continue;
5432 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5433 if (sub_count < 0)
5434 return -1;
5435 count += sub_count;
5438 /* There must be no padding. */
5439 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5440 return -1;
5442 return count;
5445 case UNION_TYPE:
5446 case QUAL_UNION_TYPE:
5448 /* These aren't very interesting except in a degenerate case. */
5449 int count = 0;
5450 int sub_count;
5451 tree field;
5453 /* Can't handle incomplete types nor sizes that are not
5454 fixed. */
5455 if (!COMPLETE_TYPE_P (type)
5456 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5457 return -1;
5459 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5461 if (TREE_CODE (field) != FIELD_DECL)
5462 continue;
5464 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5465 if (sub_count < 0)
5466 return -1;
5467 count = count > sub_count ? count : sub_count;
5470 /* There must be no padding. */
5471 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5472 return -1;
5474 return count;
5477 default:
5478 break;
5481 return -1;
5484 /* Return true if PCS_VARIANT should use VFP registers. */
5485 static bool
5486 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5488 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5490 static bool seen_thumb1_vfp = false;
5492 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5494 sorry ("Thumb-1 hard-float VFP ABI");
5495 /* sorry() is not immediately fatal, so only display this once. */
5496 seen_thumb1_vfp = true;
5499 return true;
5502 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5503 return false;
5505 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5506 (TARGET_VFP_DOUBLE || !is_double));
5509 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5510 suitable for passing or returning in VFP registers for the PCS
5511 variant selected. If it is, then *BASE_MODE is updated to contain
5512 a machine mode describing each element of the argument's type and
5513 *COUNT to hold the number of such elements. */
5514 static bool
5515 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5516 machine_mode mode, const_tree type,
5517 machine_mode *base_mode, int *count)
5519 machine_mode new_mode = VOIDmode;
5521 /* If we have the type information, prefer that to working things
5522 out from the mode. */
5523 if (type)
5525 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5527 if (ag_count > 0 && ag_count <= 4)
5528 *count = ag_count;
5529 else
5530 return false;
5532 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5533 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5534 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5536 *count = 1;
5537 new_mode = mode;
5539 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5541 *count = 2;
5542 new_mode = (mode == DCmode ? DFmode : SFmode);
5544 else
5545 return false;
5548 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5549 return false;
5551 *base_mode = new_mode;
5552 return true;
5555 static bool
5556 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5557 machine_mode mode, const_tree type)
5559 int count ATTRIBUTE_UNUSED;
5560 machine_mode ag_mode ATTRIBUTE_UNUSED;
5562 if (!use_vfp_abi (pcs_variant, false))
5563 return false;
5564 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5565 &ag_mode, &count);
5568 static bool
5569 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5570 const_tree type)
5572 if (!use_vfp_abi (pcum->pcs_variant, false))
5573 return false;
5575 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5576 &pcum->aapcs_vfp_rmode,
5577 &pcum->aapcs_vfp_rcount);
5580 static bool
5581 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5582 const_tree type ATTRIBUTE_UNUSED)
5584 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5585 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5586 int regno;
5588 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5589 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5591 pcum->aapcs_vfp_reg_alloc = mask << regno;
5592 if (mode == BLKmode
5593 || (mode == TImode && ! TARGET_NEON)
5594 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5596 int i;
5597 int rcount = pcum->aapcs_vfp_rcount;
5598 int rshift = shift;
5599 machine_mode rmode = pcum->aapcs_vfp_rmode;
5600 rtx par;
5601 if (!TARGET_NEON)
5603 /* Avoid using unsupported vector modes. */
5604 if (rmode == V2SImode)
5605 rmode = DImode;
5606 else if (rmode == V4SImode)
5608 rmode = DImode;
5609 rcount *= 2;
5610 rshift /= 2;
5613 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5614 for (i = 0; i < rcount; i++)
5616 rtx tmp = gen_rtx_REG (rmode,
5617 FIRST_VFP_REGNUM + regno + i * rshift);
5618 tmp = gen_rtx_EXPR_LIST
5619 (VOIDmode, tmp,
5620 GEN_INT (i * GET_MODE_SIZE (rmode)));
5621 XVECEXP (par, 0, i) = tmp;
5624 pcum->aapcs_reg = par;
5626 else
5627 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5628 return true;
5630 return false;
5633 static rtx
5634 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5635 machine_mode mode,
5636 const_tree type ATTRIBUTE_UNUSED)
5638 if (!use_vfp_abi (pcs_variant, false))
5639 return NULL;
5641 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5643 int count;
5644 machine_mode ag_mode;
5645 int i;
5646 rtx par;
5647 int shift;
5649 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5650 &ag_mode, &count);
5652 if (!TARGET_NEON)
5654 if (ag_mode == V2SImode)
5655 ag_mode = DImode;
5656 else if (ag_mode == V4SImode)
5658 ag_mode = DImode;
5659 count *= 2;
5662 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5663 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5664 for (i = 0; i < count; i++)
5666 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5667 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5668 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5669 XVECEXP (par, 0, i) = tmp;
5672 return par;
5675 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5678 static void
5679 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5680 machine_mode mode ATTRIBUTE_UNUSED,
5681 const_tree type ATTRIBUTE_UNUSED)
5683 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5684 pcum->aapcs_vfp_reg_alloc = 0;
5685 return;
5688 #define AAPCS_CP(X) \
5690 aapcs_ ## X ## _cum_init, \
5691 aapcs_ ## X ## _is_call_candidate, \
5692 aapcs_ ## X ## _allocate, \
5693 aapcs_ ## X ## _is_return_candidate, \
5694 aapcs_ ## X ## _allocate_return_reg, \
5695 aapcs_ ## X ## _advance \
5698 /* Table of co-processors that can be used to pass arguments in
5699 registers. Idealy no arugment should be a candidate for more than
5700 one co-processor table entry, but the table is processed in order
5701 and stops after the first match. If that entry then fails to put
5702 the argument into a co-processor register, the argument will go on
5703 the stack. */
5704 static struct
5706 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5707 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5709 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5710 BLKmode) is a candidate for this co-processor's registers; this
5711 function should ignore any position-dependent state in
5712 CUMULATIVE_ARGS and only use call-type dependent information. */
5713 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5715 /* Return true if the argument does get a co-processor register; it
5716 should set aapcs_reg to an RTX of the register allocated as is
5717 required for a return from FUNCTION_ARG. */
5718 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5720 /* Return true if a result of mode MODE (or type TYPE if MODE is
5721 BLKmode) is can be returned in this co-processor's registers. */
5722 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
5724 /* Allocate and return an RTX element to hold the return type of a
5725 call, this routine must not fail and will only be called if
5726 is_return_candidate returned true with the same parameters. */
5727 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
5729 /* Finish processing this argument and prepare to start processing
5730 the next one. */
5731 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5732 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5734 AAPCS_CP(vfp)
5737 #undef AAPCS_CP
5739 static int
5740 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
5741 const_tree type)
5743 int i;
5745 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5746 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5747 return i;
5749 return -1;
5752 static int
5753 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5755 /* We aren't passed a decl, so we can't check that a call is local.
5756 However, it isn't clear that that would be a win anyway, since it
5757 might limit some tail-calling opportunities. */
5758 enum arm_pcs pcs_variant;
5760 if (fntype)
5762 const_tree fndecl = NULL_TREE;
5764 if (TREE_CODE (fntype) == FUNCTION_DECL)
5766 fndecl = fntype;
5767 fntype = TREE_TYPE (fntype);
5770 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5772 else
5773 pcs_variant = arm_pcs_default;
5775 if (pcs_variant != ARM_PCS_AAPCS)
5777 int i;
5779 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5780 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5781 TYPE_MODE (type),
5782 type))
5783 return i;
5785 return -1;
5788 static rtx
5789 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
5790 const_tree fntype)
5792 /* We aren't passed a decl, so we can't check that a call is local.
5793 However, it isn't clear that that would be a win anyway, since it
5794 might limit some tail-calling opportunities. */
5795 enum arm_pcs pcs_variant;
5796 int unsignedp ATTRIBUTE_UNUSED;
5798 if (fntype)
5800 const_tree fndecl = NULL_TREE;
5802 if (TREE_CODE (fntype) == FUNCTION_DECL)
5804 fndecl = fntype;
5805 fntype = TREE_TYPE (fntype);
5808 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5810 else
5811 pcs_variant = arm_pcs_default;
5813 /* Promote integer types. */
5814 if (type && INTEGRAL_TYPE_P (type))
5815 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5817 if (pcs_variant != ARM_PCS_AAPCS)
5819 int i;
5821 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5822 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5823 type))
5824 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5825 mode, type);
5828 /* Promotes small structs returned in a register to full-word size
5829 for big-endian AAPCS. */
5830 if (type && arm_return_in_msb (type))
5832 HOST_WIDE_INT size = int_size_in_bytes (type);
5833 if (size % UNITS_PER_WORD != 0)
5835 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5836 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5840 return gen_rtx_REG (mode, R0_REGNUM);
5843 static rtx
5844 aapcs_libcall_value (machine_mode mode)
5846 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5847 && GET_MODE_SIZE (mode) <= 4)
5848 mode = SImode;
5850 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5853 /* Lay out a function argument using the AAPCS rules. The rule
5854 numbers referred to here are those in the AAPCS. */
5855 static void
5856 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
5857 const_tree type, bool named)
5859 int nregs, nregs2;
5860 int ncrn;
5862 /* We only need to do this once per argument. */
5863 if (pcum->aapcs_arg_processed)
5864 return;
5866 pcum->aapcs_arg_processed = true;
5868 /* Special case: if named is false then we are handling an incoming
5869 anonymous argument which is on the stack. */
5870 if (!named)
5871 return;
5873 /* Is this a potential co-processor register candidate? */
5874 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5876 int slot = aapcs_select_call_coproc (pcum, mode, type);
5877 pcum->aapcs_cprc_slot = slot;
5879 /* We don't have to apply any of the rules from part B of the
5880 preparation phase, these are handled elsewhere in the
5881 compiler. */
5883 if (slot >= 0)
5885 /* A Co-processor register candidate goes either in its own
5886 class of registers or on the stack. */
5887 if (!pcum->aapcs_cprc_failed[slot])
5889 /* C1.cp - Try to allocate the argument to co-processor
5890 registers. */
5891 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5892 return;
5894 /* C2.cp - Put the argument on the stack and note that we
5895 can't assign any more candidates in this slot. We also
5896 need to note that we have allocated stack space, so that
5897 we won't later try to split a non-cprc candidate between
5898 core registers and the stack. */
5899 pcum->aapcs_cprc_failed[slot] = true;
5900 pcum->can_split = false;
5903 /* We didn't get a register, so this argument goes on the
5904 stack. */
5905 gcc_assert (pcum->can_split == false);
5906 return;
5910 /* C3 - For double-word aligned arguments, round the NCRN up to the
5911 next even number. */
5912 ncrn = pcum->aapcs_ncrn;
5913 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5914 ncrn++;
5916 nregs = ARM_NUM_REGS2(mode, type);
5918 /* Sigh, this test should really assert that nregs > 0, but a GCC
5919 extension allows empty structs and then gives them empty size; it
5920 then allows such a structure to be passed by value. For some of
5921 the code below we have to pretend that such an argument has
5922 non-zero size so that we 'locate' it correctly either in
5923 registers or on the stack. */
5924 gcc_assert (nregs >= 0);
5926 nregs2 = nregs ? nregs : 1;
5928 /* C4 - Argument fits entirely in core registers. */
5929 if (ncrn + nregs2 <= NUM_ARG_REGS)
5931 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5932 pcum->aapcs_next_ncrn = ncrn + nregs;
5933 return;
5936 /* C5 - Some core registers left and there are no arguments already
5937 on the stack: split this argument between the remaining core
5938 registers and the stack. */
5939 if (ncrn < NUM_ARG_REGS && pcum->can_split)
5941 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5942 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5943 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5944 return;
5947 /* C6 - NCRN is set to 4. */
5948 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5950 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5951 return;
5954 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5955 for a call to a function whose data type is FNTYPE.
5956 For a library call, FNTYPE is NULL. */
5957 void
5958 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5959 rtx libname,
5960 tree fndecl ATTRIBUTE_UNUSED)
5962 /* Long call handling. */
5963 if (fntype)
5964 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5965 else
5966 pcum->pcs_variant = arm_pcs_default;
5968 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5970 if (arm_libcall_uses_aapcs_base (libname))
5971 pcum->pcs_variant = ARM_PCS_AAPCS;
5973 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5974 pcum->aapcs_reg = NULL_RTX;
5975 pcum->aapcs_partial = 0;
5976 pcum->aapcs_arg_processed = false;
5977 pcum->aapcs_cprc_slot = -1;
5978 pcum->can_split = true;
5980 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5982 int i;
5984 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5986 pcum->aapcs_cprc_failed[i] = false;
5987 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5990 return;
5993 /* Legacy ABIs */
5995 /* On the ARM, the offset starts at 0. */
5996 pcum->nregs = 0;
5997 pcum->iwmmxt_nregs = 0;
5998 pcum->can_split = true;
6000 /* Varargs vectors are treated the same as long long.
6001 named_count avoids having to change the way arm handles 'named' */
6002 pcum->named_count = 0;
6003 pcum->nargs = 0;
6005 if (TARGET_REALLY_IWMMXT && fntype)
6007 tree fn_arg;
6009 for (fn_arg = TYPE_ARG_TYPES (fntype);
6010 fn_arg;
6011 fn_arg = TREE_CHAIN (fn_arg))
6012 pcum->named_count += 1;
6014 if (! pcum->named_count)
6015 pcum->named_count = INT_MAX;
6019 /* Return true if mode/type need doubleword alignment. */
6020 static bool
6021 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6023 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
6024 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
6028 /* Determine where to put an argument to a function.
6029 Value is zero to push the argument on the stack,
6030 or a hard register in which to store the argument.
6032 MODE is the argument's machine mode.
6033 TYPE is the data type of the argument (as a tree).
6034 This is null for libcalls where that information may
6035 not be available.
6036 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6037 the preceding args and about the function being called.
6038 NAMED is nonzero if this argument is a named parameter
6039 (otherwise it is an extra parameter matching an ellipsis).
6041 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6042 other arguments are passed on the stack. If (NAMED == 0) (which happens
6043 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6044 defined), say it is passed in the stack (function_prologue will
6045 indeed make it pass in the stack if necessary). */
6047 static rtx
6048 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6049 const_tree type, bool named)
6051 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6052 int nregs;
6054 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6055 a call insn (op3 of a call_value insn). */
6056 if (mode == VOIDmode)
6057 return const0_rtx;
6059 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6061 aapcs_layout_arg (pcum, mode, type, named);
6062 return pcum->aapcs_reg;
6065 /* Varargs vectors are treated the same as long long.
6066 named_count avoids having to change the way arm handles 'named' */
6067 if (TARGET_IWMMXT_ABI
6068 && arm_vector_mode_supported_p (mode)
6069 && pcum->named_count > pcum->nargs + 1)
6071 if (pcum->iwmmxt_nregs <= 9)
6072 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6073 else
6075 pcum->can_split = false;
6076 return NULL_RTX;
6080 /* Put doubleword aligned quantities in even register pairs. */
6081 if (pcum->nregs & 1
6082 && ARM_DOUBLEWORD_ALIGN
6083 && arm_needs_doubleword_align (mode, type))
6084 pcum->nregs++;
6086 /* Only allow splitting an arg between regs and memory if all preceding
6087 args were allocated to regs. For args passed by reference we only count
6088 the reference pointer. */
6089 if (pcum->can_split)
6090 nregs = 1;
6091 else
6092 nregs = ARM_NUM_REGS2 (mode, type);
6094 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6095 return NULL_RTX;
6097 return gen_rtx_REG (mode, pcum->nregs);
6100 static unsigned int
6101 arm_function_arg_boundary (machine_mode mode, const_tree type)
6103 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
6104 ? DOUBLEWORD_ALIGNMENT
6105 : PARM_BOUNDARY);
6108 static int
6109 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6110 tree type, bool named)
6112 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6113 int nregs = pcum->nregs;
6115 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6117 aapcs_layout_arg (pcum, mode, type, named);
6118 return pcum->aapcs_partial;
6121 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6122 return 0;
6124 if (NUM_ARG_REGS > nregs
6125 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6126 && pcum->can_split)
6127 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6129 return 0;
6132 /* Update the data in PCUM to advance over an argument
6133 of mode MODE and data type TYPE.
6134 (TYPE is null for libcalls where that information may not be available.) */
6136 static void
6137 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6138 const_tree type, bool named)
6140 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6142 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6144 aapcs_layout_arg (pcum, mode, type, named);
6146 if (pcum->aapcs_cprc_slot >= 0)
6148 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6149 type);
6150 pcum->aapcs_cprc_slot = -1;
6153 /* Generic stuff. */
6154 pcum->aapcs_arg_processed = false;
6155 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6156 pcum->aapcs_reg = NULL_RTX;
6157 pcum->aapcs_partial = 0;
6159 else
6161 pcum->nargs += 1;
6162 if (arm_vector_mode_supported_p (mode)
6163 && pcum->named_count > pcum->nargs
6164 && TARGET_IWMMXT_ABI)
6165 pcum->iwmmxt_nregs += 1;
6166 else
6167 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6171 /* Variable sized types are passed by reference. This is a GCC
6172 extension to the ARM ABI. */
6174 static bool
6175 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6176 machine_mode mode ATTRIBUTE_UNUSED,
6177 const_tree type, bool named ATTRIBUTE_UNUSED)
6179 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6182 /* Encode the current state of the #pragma [no_]long_calls. */
6183 typedef enum
6185 OFF, /* No #pragma [no_]long_calls is in effect. */
6186 LONG, /* #pragma long_calls is in effect. */
6187 SHORT /* #pragma no_long_calls is in effect. */
6188 } arm_pragma_enum;
6190 static arm_pragma_enum arm_pragma_long_calls = OFF;
6192 void
6193 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6195 arm_pragma_long_calls = LONG;
6198 void
6199 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6201 arm_pragma_long_calls = SHORT;
6204 void
6205 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6207 arm_pragma_long_calls = OFF;
6210 /* Handle an attribute requiring a FUNCTION_DECL;
6211 arguments as in struct attribute_spec.handler. */
6212 static tree
6213 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6214 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6216 if (TREE_CODE (*node) != FUNCTION_DECL)
6218 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6219 name);
6220 *no_add_attrs = true;
6223 return NULL_TREE;
6226 /* Handle an "interrupt" or "isr" attribute;
6227 arguments as in struct attribute_spec.handler. */
6228 static tree
6229 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6230 bool *no_add_attrs)
6232 if (DECL_P (*node))
6234 if (TREE_CODE (*node) != FUNCTION_DECL)
6236 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6237 name);
6238 *no_add_attrs = true;
6240 /* FIXME: the argument if any is checked for type attributes;
6241 should it be checked for decl ones? */
6243 else
6245 if (TREE_CODE (*node) == FUNCTION_TYPE
6246 || TREE_CODE (*node) == METHOD_TYPE)
6248 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6250 warning (OPT_Wattributes, "%qE attribute ignored",
6251 name);
6252 *no_add_attrs = true;
6255 else if (TREE_CODE (*node) == POINTER_TYPE
6256 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6257 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6258 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6260 *node = build_variant_type_copy (*node);
6261 TREE_TYPE (*node) = build_type_attribute_variant
6262 (TREE_TYPE (*node),
6263 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6264 *no_add_attrs = true;
6266 else
6268 /* Possibly pass this attribute on from the type to a decl. */
6269 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6270 | (int) ATTR_FLAG_FUNCTION_NEXT
6271 | (int) ATTR_FLAG_ARRAY_NEXT))
6273 *no_add_attrs = true;
6274 return tree_cons (name, args, NULL_TREE);
6276 else
6278 warning (OPT_Wattributes, "%qE attribute ignored",
6279 name);
6284 return NULL_TREE;
6287 /* Handle a "pcs" attribute; arguments as in struct
6288 attribute_spec.handler. */
6289 static tree
6290 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6291 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6293 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6295 warning (OPT_Wattributes, "%qE attribute ignored", name);
6296 *no_add_attrs = true;
6298 return NULL_TREE;
6301 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6302 /* Handle the "notshared" attribute. This attribute is another way of
6303 requesting hidden visibility. ARM's compiler supports
6304 "__declspec(notshared)"; we support the same thing via an
6305 attribute. */
6307 static tree
6308 arm_handle_notshared_attribute (tree *node,
6309 tree name ATTRIBUTE_UNUSED,
6310 tree args ATTRIBUTE_UNUSED,
6311 int flags ATTRIBUTE_UNUSED,
6312 bool *no_add_attrs)
6314 tree decl = TYPE_NAME (*node);
6316 if (decl)
6318 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6319 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6320 *no_add_attrs = false;
6322 return NULL_TREE;
6324 #endif
6326 /* Return 0 if the attributes for two types are incompatible, 1 if they
6327 are compatible, and 2 if they are nearly compatible (which causes a
6328 warning to be generated). */
6329 static int
6330 arm_comp_type_attributes (const_tree type1, const_tree type2)
6332 int l1, l2, s1, s2;
6334 /* Check for mismatch of non-default calling convention. */
6335 if (TREE_CODE (type1) != FUNCTION_TYPE)
6336 return 1;
6338 /* Check for mismatched call attributes. */
6339 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6340 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6341 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6342 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6344 /* Only bother to check if an attribute is defined. */
6345 if (l1 | l2 | s1 | s2)
6347 /* If one type has an attribute, the other must have the same attribute. */
6348 if ((l1 != l2) || (s1 != s2))
6349 return 0;
6351 /* Disallow mixed attributes. */
6352 if ((l1 & s2) || (l2 & s1))
6353 return 0;
6356 /* Check for mismatched ISR attribute. */
6357 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6358 if (! l1)
6359 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6360 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6361 if (! l2)
6362 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6363 if (l1 != l2)
6364 return 0;
6366 return 1;
6369 /* Assigns default attributes to newly defined type. This is used to
6370 set short_call/long_call attributes for function types of
6371 functions defined inside corresponding #pragma scopes. */
6372 static void
6373 arm_set_default_type_attributes (tree type)
6375 /* Add __attribute__ ((long_call)) to all functions, when
6376 inside #pragma long_calls or __attribute__ ((short_call)),
6377 when inside #pragma no_long_calls. */
6378 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6380 tree type_attr_list, attr_name;
6381 type_attr_list = TYPE_ATTRIBUTES (type);
6383 if (arm_pragma_long_calls == LONG)
6384 attr_name = get_identifier ("long_call");
6385 else if (arm_pragma_long_calls == SHORT)
6386 attr_name = get_identifier ("short_call");
6387 else
6388 return;
6390 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6391 TYPE_ATTRIBUTES (type) = type_attr_list;
6395 /* Return true if DECL is known to be linked into section SECTION. */
6397 static bool
6398 arm_function_in_section_p (tree decl, section *section)
6400 /* We can only be certain about the prevailing symbol definition. */
6401 if (!decl_binds_to_current_def_p (decl))
6402 return false;
6404 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6405 if (!DECL_SECTION_NAME (decl))
6407 /* Make sure that we will not create a unique section for DECL. */
6408 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6409 return false;
6412 return function_section (decl) == section;
6415 /* Return nonzero if a 32-bit "long_call" should be generated for
6416 a call from the current function to DECL. We generate a long_call
6417 if the function:
6419 a. has an __attribute__((long call))
6420 or b. is within the scope of a #pragma long_calls
6421 or c. the -mlong-calls command line switch has been specified
6423 However we do not generate a long call if the function:
6425 d. has an __attribute__ ((short_call))
6426 or e. is inside the scope of a #pragma no_long_calls
6427 or f. is defined in the same section as the current function. */
6429 bool
6430 arm_is_long_call_p (tree decl)
6432 tree attrs;
6434 if (!decl)
6435 return TARGET_LONG_CALLS;
6437 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6438 if (lookup_attribute ("short_call", attrs))
6439 return false;
6441 /* For "f", be conservative, and only cater for cases in which the
6442 whole of the current function is placed in the same section. */
6443 if (!flag_reorder_blocks_and_partition
6444 && TREE_CODE (decl) == FUNCTION_DECL
6445 && arm_function_in_section_p (decl, current_function_section ()))
6446 return false;
6448 if (lookup_attribute ("long_call", attrs))
6449 return true;
6451 return TARGET_LONG_CALLS;
6454 /* Return nonzero if it is ok to make a tail-call to DECL. */
6455 static bool
6456 arm_function_ok_for_sibcall (tree decl, tree exp)
6458 unsigned long func_type;
6460 if (cfun->machine->sibcall_blocked)
6461 return false;
6463 /* Never tailcall something if we are generating code for Thumb-1. */
6464 if (TARGET_THUMB1)
6465 return false;
6467 /* The PIC register is live on entry to VxWorks PLT entries, so we
6468 must make the call before restoring the PIC register. */
6469 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6470 return false;
6472 /* If we are interworking and the function is not declared static
6473 then we can't tail-call it unless we know that it exists in this
6474 compilation unit (since it might be a Thumb routine). */
6475 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6476 && !TREE_ASM_WRITTEN (decl))
6477 return false;
6479 func_type = arm_current_func_type ();
6480 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6481 if (IS_INTERRUPT (func_type))
6482 return false;
6484 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6486 /* Check that the return value locations are the same. For
6487 example that we aren't returning a value from the sibling in
6488 a VFP register but then need to transfer it to a core
6489 register. */
6490 rtx a, b;
6492 a = arm_function_value (TREE_TYPE (exp), decl, false);
6493 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6494 cfun->decl, false);
6495 if (!rtx_equal_p (a, b))
6496 return false;
6499 /* Never tailcall if function may be called with a misaligned SP. */
6500 if (IS_STACKALIGN (func_type))
6501 return false;
6503 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6504 references should become a NOP. Don't convert such calls into
6505 sibling calls. */
6506 if (TARGET_AAPCS_BASED
6507 && arm_abi == ARM_ABI_AAPCS
6508 && decl
6509 && DECL_WEAK (decl))
6510 return false;
6512 /* Everything else is ok. */
6513 return true;
6517 /* Addressing mode support functions. */
6519 /* Return nonzero if X is a legitimate immediate operand when compiling
6520 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6522 legitimate_pic_operand_p (rtx x)
6524 if (GET_CODE (x) == SYMBOL_REF
6525 || (GET_CODE (x) == CONST
6526 && GET_CODE (XEXP (x, 0)) == PLUS
6527 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6528 return 0;
6530 return 1;
6533 /* Record that the current function needs a PIC register. Initialize
6534 cfun->machine->pic_reg if we have not already done so. */
6536 static void
6537 require_pic_register (void)
6539 /* A lot of the logic here is made obscure by the fact that this
6540 routine gets called as part of the rtx cost estimation process.
6541 We don't want those calls to affect any assumptions about the real
6542 function; and further, we can't call entry_of_function() until we
6543 start the real expansion process. */
6544 if (!crtl->uses_pic_offset_table)
6546 gcc_assert (can_create_pseudo_p ());
6547 if (arm_pic_register != INVALID_REGNUM
6548 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6550 if (!cfun->machine->pic_reg)
6551 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6553 /* Play games to avoid marking the function as needing pic
6554 if we are being called as part of the cost-estimation
6555 process. */
6556 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6557 crtl->uses_pic_offset_table = 1;
6559 else
6561 rtx_insn *seq, *insn;
6563 if (!cfun->machine->pic_reg)
6564 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6566 /* Play games to avoid marking the function as needing pic
6567 if we are being called as part of the cost-estimation
6568 process. */
6569 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6571 crtl->uses_pic_offset_table = 1;
6572 start_sequence ();
6574 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6575 && arm_pic_register > LAST_LO_REGNUM)
6576 emit_move_insn (cfun->machine->pic_reg,
6577 gen_rtx_REG (Pmode, arm_pic_register));
6578 else
6579 arm_load_pic_register (0UL);
6581 seq = get_insns ();
6582 end_sequence ();
6584 for (insn = seq; insn; insn = NEXT_INSN (insn))
6585 if (INSN_P (insn))
6586 INSN_LOCATION (insn) = prologue_location;
6588 /* We can be called during expansion of PHI nodes, where
6589 we can't yet emit instructions directly in the final
6590 insn stream. Queue the insns on the entry edge, they will
6591 be committed after everything else is expanded. */
6592 insert_insn_on_edge (seq,
6593 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6600 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6602 if (GET_CODE (orig) == SYMBOL_REF
6603 || GET_CODE (orig) == LABEL_REF)
6605 rtx insn;
6607 if (reg == 0)
6609 gcc_assert (can_create_pseudo_p ());
6610 reg = gen_reg_rtx (Pmode);
6613 /* VxWorks does not impose a fixed gap between segments; the run-time
6614 gap can be different from the object-file gap. We therefore can't
6615 use GOTOFF unless we are absolutely sure that the symbol is in the
6616 same segment as the GOT. Unfortunately, the flexibility of linker
6617 scripts means that we can't be sure of that in general, so assume
6618 that GOTOFF is never valid on VxWorks. */
6619 if ((GET_CODE (orig) == LABEL_REF
6620 || (GET_CODE (orig) == SYMBOL_REF &&
6621 SYMBOL_REF_LOCAL_P (orig)))
6622 && NEED_GOT_RELOC
6623 && arm_pic_data_is_text_relative)
6624 insn = arm_pic_static_addr (orig, reg);
6625 else
6627 rtx pat;
6628 rtx mem;
6630 /* If this function doesn't have a pic register, create one now. */
6631 require_pic_register ();
6633 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6635 /* Make the MEM as close to a constant as possible. */
6636 mem = SET_SRC (pat);
6637 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6638 MEM_READONLY_P (mem) = 1;
6639 MEM_NOTRAP_P (mem) = 1;
6641 insn = emit_insn (pat);
6644 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6645 by loop. */
6646 set_unique_reg_note (insn, REG_EQUAL, orig);
6648 return reg;
6650 else if (GET_CODE (orig) == CONST)
6652 rtx base, offset;
6654 if (GET_CODE (XEXP (orig, 0)) == PLUS
6655 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6656 return orig;
6658 /* Handle the case where we have: const (UNSPEC_TLS). */
6659 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6660 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6661 return orig;
6663 /* Handle the case where we have:
6664 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6665 CONST_INT. */
6666 if (GET_CODE (XEXP (orig, 0)) == PLUS
6667 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6668 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6670 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6671 return orig;
6674 if (reg == 0)
6676 gcc_assert (can_create_pseudo_p ());
6677 reg = gen_reg_rtx (Pmode);
6680 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6682 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6683 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6684 base == reg ? 0 : reg);
6686 if (CONST_INT_P (offset))
6688 /* The base register doesn't really matter, we only want to
6689 test the index for the appropriate mode. */
6690 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6692 gcc_assert (can_create_pseudo_p ());
6693 offset = force_reg (Pmode, offset);
6696 if (CONST_INT_P (offset))
6697 return plus_constant (Pmode, base, INTVAL (offset));
6700 if (GET_MODE_SIZE (mode) > 4
6701 && (GET_MODE_CLASS (mode) == MODE_INT
6702 || TARGET_SOFT_FLOAT))
6704 emit_insn (gen_addsi3 (reg, base, offset));
6705 return reg;
6708 return gen_rtx_PLUS (Pmode, base, offset);
6711 return orig;
6715 /* Find a spare register to use during the prolog of a function. */
6717 static int
6718 thumb_find_work_register (unsigned long pushed_regs_mask)
6720 int reg;
6722 /* Check the argument registers first as these are call-used. The
6723 register allocation order means that sometimes r3 might be used
6724 but earlier argument registers might not, so check them all. */
6725 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6726 if (!df_regs_ever_live_p (reg))
6727 return reg;
6729 /* Before going on to check the call-saved registers we can try a couple
6730 more ways of deducing that r3 is available. The first is when we are
6731 pushing anonymous arguments onto the stack and we have less than 4
6732 registers worth of fixed arguments(*). In this case r3 will be part of
6733 the variable argument list and so we can be sure that it will be
6734 pushed right at the start of the function. Hence it will be available
6735 for the rest of the prologue.
6736 (*): ie crtl->args.pretend_args_size is greater than 0. */
6737 if (cfun->machine->uses_anonymous_args
6738 && crtl->args.pretend_args_size > 0)
6739 return LAST_ARG_REGNUM;
6741 /* The other case is when we have fixed arguments but less than 4 registers
6742 worth. In this case r3 might be used in the body of the function, but
6743 it is not being used to convey an argument into the function. In theory
6744 we could just check crtl->args.size to see how many bytes are
6745 being passed in argument registers, but it seems that it is unreliable.
6746 Sometimes it will have the value 0 when in fact arguments are being
6747 passed. (See testcase execute/20021111-1.c for an example). So we also
6748 check the args_info.nregs field as well. The problem with this field is
6749 that it makes no allowances for arguments that are passed to the
6750 function but which are not used. Hence we could miss an opportunity
6751 when a function has an unused argument in r3. But it is better to be
6752 safe than to be sorry. */
6753 if (! cfun->machine->uses_anonymous_args
6754 && crtl->args.size >= 0
6755 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6756 && (TARGET_AAPCS_BASED
6757 ? crtl->args.info.aapcs_ncrn < 4
6758 : crtl->args.info.nregs < 4))
6759 return LAST_ARG_REGNUM;
6761 /* Otherwise look for a call-saved register that is going to be pushed. */
6762 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6763 if (pushed_regs_mask & (1 << reg))
6764 return reg;
6766 if (TARGET_THUMB2)
6768 /* Thumb-2 can use high regs. */
6769 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6770 if (pushed_regs_mask & (1 << reg))
6771 return reg;
6773 /* Something went wrong - thumb_compute_save_reg_mask()
6774 should have arranged for a suitable register to be pushed. */
6775 gcc_unreachable ();
6778 static GTY(()) int pic_labelno;
6780 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6781 low register. */
6783 void
6784 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6786 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6788 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6789 return;
6791 gcc_assert (flag_pic);
6793 pic_reg = cfun->machine->pic_reg;
6794 if (TARGET_VXWORKS_RTP)
6796 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6797 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6798 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6800 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6802 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6803 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6805 else
6807 /* We use an UNSPEC rather than a LABEL_REF because this label
6808 never appears in the code stream. */
6810 labelno = GEN_INT (pic_labelno++);
6811 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6812 l1 = gen_rtx_CONST (VOIDmode, l1);
6814 /* On the ARM the PC register contains 'dot + 8' at the time of the
6815 addition, on the Thumb it is 'dot + 4'. */
6816 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6817 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6818 UNSPEC_GOTSYM_OFF);
6819 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6821 if (TARGET_32BIT)
6823 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6825 else /* TARGET_THUMB1 */
6827 if (arm_pic_register != INVALID_REGNUM
6828 && REGNO (pic_reg) > LAST_LO_REGNUM)
6830 /* We will have pushed the pic register, so we should always be
6831 able to find a work register. */
6832 pic_tmp = gen_rtx_REG (SImode,
6833 thumb_find_work_register (saved_regs));
6834 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6835 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6836 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6838 else if (arm_pic_register != INVALID_REGNUM
6839 && arm_pic_register > LAST_LO_REGNUM
6840 && REGNO (pic_reg) <= LAST_LO_REGNUM)
6842 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6843 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6844 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6846 else
6847 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6851 /* Need to emit this whether or not we obey regdecls,
6852 since setjmp/longjmp can cause life info to screw up. */
6853 emit_use (pic_reg);
6856 /* Generate code to load the address of a static var when flag_pic is set. */
6857 static rtx
6858 arm_pic_static_addr (rtx orig, rtx reg)
6860 rtx l1, labelno, offset_rtx, insn;
6862 gcc_assert (flag_pic);
6864 /* We use an UNSPEC rather than a LABEL_REF because this label
6865 never appears in the code stream. */
6866 labelno = GEN_INT (pic_labelno++);
6867 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6868 l1 = gen_rtx_CONST (VOIDmode, l1);
6870 /* On the ARM the PC register contains 'dot + 8' at the time of the
6871 addition, on the Thumb it is 'dot + 4'. */
6872 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6873 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6874 UNSPEC_SYMBOL_OFFSET);
6875 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6877 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6878 return insn;
6881 /* Return nonzero if X is valid as an ARM state addressing register. */
6882 static int
6883 arm_address_register_rtx_p (rtx x, int strict_p)
6885 int regno;
6887 if (!REG_P (x))
6888 return 0;
6890 regno = REGNO (x);
6892 if (strict_p)
6893 return ARM_REGNO_OK_FOR_BASE_P (regno);
6895 return (regno <= LAST_ARM_REGNUM
6896 || regno >= FIRST_PSEUDO_REGISTER
6897 || regno == FRAME_POINTER_REGNUM
6898 || regno == ARG_POINTER_REGNUM);
6901 /* Return TRUE if this rtx is the difference of a symbol and a label,
6902 and will reduce to a PC-relative relocation in the object file.
6903 Expressions like this can be left alone when generating PIC, rather
6904 than forced through the GOT. */
6905 static int
6906 pcrel_constant_p (rtx x)
6908 if (GET_CODE (x) == MINUS)
6909 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6911 return FALSE;
6914 /* Return true if X will surely end up in an index register after next
6915 splitting pass. */
6916 static bool
6917 will_be_in_index_register (const_rtx x)
6919 /* arm.md: calculate_pic_address will split this into a register. */
6920 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6923 /* Return nonzero if X is a valid ARM state address operand. */
6925 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
6926 int strict_p)
6928 bool use_ldrd;
6929 enum rtx_code code = GET_CODE (x);
6931 if (arm_address_register_rtx_p (x, strict_p))
6932 return 1;
6934 use_ldrd = (TARGET_LDRD
6935 && (mode == DImode
6936 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6938 if (code == POST_INC || code == PRE_DEC
6939 || ((code == PRE_INC || code == POST_DEC)
6940 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6941 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6943 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6944 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6945 && GET_CODE (XEXP (x, 1)) == PLUS
6946 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6948 rtx addend = XEXP (XEXP (x, 1), 1);
6950 /* Don't allow ldrd post increment by register because it's hard
6951 to fixup invalid register choices. */
6952 if (use_ldrd
6953 && GET_CODE (x) == POST_MODIFY
6954 && REG_P (addend))
6955 return 0;
6957 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6958 && arm_legitimate_index_p (mode, addend, outer, strict_p));
6961 /* After reload constants split into minipools will have addresses
6962 from a LABEL_REF. */
6963 else if (reload_completed
6964 && (code == LABEL_REF
6965 || (code == CONST
6966 && GET_CODE (XEXP (x, 0)) == PLUS
6967 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6968 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6969 return 1;
6971 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6972 return 0;
6974 else if (code == PLUS)
6976 rtx xop0 = XEXP (x, 0);
6977 rtx xop1 = XEXP (x, 1);
6979 return ((arm_address_register_rtx_p (xop0, strict_p)
6980 && ((CONST_INT_P (xop1)
6981 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6982 || (!strict_p && will_be_in_index_register (xop1))))
6983 || (arm_address_register_rtx_p (xop1, strict_p)
6984 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6987 #if 0
6988 /* Reload currently can't handle MINUS, so disable this for now */
6989 else if (GET_CODE (x) == MINUS)
6991 rtx xop0 = XEXP (x, 0);
6992 rtx xop1 = XEXP (x, 1);
6994 return (arm_address_register_rtx_p (xop0, strict_p)
6995 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6997 #endif
6999 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7000 && code == SYMBOL_REF
7001 && CONSTANT_POOL_ADDRESS_P (x)
7002 && ! (flag_pic
7003 && symbol_mentioned_p (get_pool_constant (x))
7004 && ! pcrel_constant_p (get_pool_constant (x))))
7005 return 1;
7007 return 0;
7010 /* Return nonzero if X is a valid Thumb-2 address operand. */
7011 static int
7012 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7014 bool use_ldrd;
7015 enum rtx_code code = GET_CODE (x);
7017 if (arm_address_register_rtx_p (x, strict_p))
7018 return 1;
7020 use_ldrd = (TARGET_LDRD
7021 && (mode == DImode
7022 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
7024 if (code == POST_INC || code == PRE_DEC
7025 || ((code == PRE_INC || code == POST_DEC)
7026 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7027 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7029 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7030 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7031 && GET_CODE (XEXP (x, 1)) == PLUS
7032 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7034 /* Thumb-2 only has autoincrement by constant. */
7035 rtx addend = XEXP (XEXP (x, 1), 1);
7036 HOST_WIDE_INT offset;
7038 if (!CONST_INT_P (addend))
7039 return 0;
7041 offset = INTVAL(addend);
7042 if (GET_MODE_SIZE (mode) <= 4)
7043 return (offset > -256 && offset < 256);
7045 return (use_ldrd && offset > -1024 && offset < 1024
7046 && (offset & 3) == 0);
7049 /* After reload constants split into minipools will have addresses
7050 from a LABEL_REF. */
7051 else if (reload_completed
7052 && (code == LABEL_REF
7053 || (code == CONST
7054 && GET_CODE (XEXP (x, 0)) == PLUS
7055 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7056 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7057 return 1;
7059 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7060 return 0;
7062 else if (code == PLUS)
7064 rtx xop0 = XEXP (x, 0);
7065 rtx xop1 = XEXP (x, 1);
7067 return ((arm_address_register_rtx_p (xop0, strict_p)
7068 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7069 || (!strict_p && will_be_in_index_register (xop1))))
7070 || (arm_address_register_rtx_p (xop1, strict_p)
7071 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7074 /* Normally we can assign constant values to target registers without
7075 the help of constant pool. But there are cases we have to use constant
7076 pool like:
7077 1) assign a label to register.
7078 2) sign-extend a 8bit value to 32bit and then assign to register.
7080 Constant pool access in format:
7081 (set (reg r0) (mem (symbol_ref (".LC0"))))
7082 will cause the use of literal pool (later in function arm_reorg).
7083 So here we mark such format as an invalid format, then the compiler
7084 will adjust it into:
7085 (set (reg r0) (symbol_ref (".LC0")))
7086 (set (reg r0) (mem (reg r0))).
7087 No extra register is required, and (mem (reg r0)) won't cause the use
7088 of literal pools. */
7089 else if (arm_disable_literal_pool && code == SYMBOL_REF
7090 && CONSTANT_POOL_ADDRESS_P (x))
7091 return 0;
7093 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7094 && code == SYMBOL_REF
7095 && CONSTANT_POOL_ADDRESS_P (x)
7096 && ! (flag_pic
7097 && symbol_mentioned_p (get_pool_constant (x))
7098 && ! pcrel_constant_p (get_pool_constant (x))))
7099 return 1;
7101 return 0;
7104 /* Return nonzero if INDEX is valid for an address index operand in
7105 ARM state. */
7106 static int
7107 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7108 int strict_p)
7110 HOST_WIDE_INT range;
7111 enum rtx_code code = GET_CODE (index);
7113 /* Standard coprocessor addressing modes. */
7114 if (TARGET_HARD_FLOAT
7115 && TARGET_VFP
7116 && (mode == SFmode || mode == DFmode))
7117 return (code == CONST_INT && INTVAL (index) < 1024
7118 && INTVAL (index) > -1024
7119 && (INTVAL (index) & 3) == 0);
7121 /* For quad modes, we restrict the constant offset to be slightly less
7122 than what the instruction format permits. We do this because for
7123 quad mode moves, we will actually decompose them into two separate
7124 double-mode reads or writes. INDEX must therefore be a valid
7125 (double-mode) offset and so should INDEX+8. */
7126 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7127 return (code == CONST_INT
7128 && INTVAL (index) < 1016
7129 && INTVAL (index) > -1024
7130 && (INTVAL (index) & 3) == 0);
7132 /* We have no such constraint on double mode offsets, so we permit the
7133 full range of the instruction format. */
7134 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7135 return (code == CONST_INT
7136 && INTVAL (index) < 1024
7137 && INTVAL (index) > -1024
7138 && (INTVAL (index) & 3) == 0);
7140 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7141 return (code == CONST_INT
7142 && INTVAL (index) < 1024
7143 && INTVAL (index) > -1024
7144 && (INTVAL (index) & 3) == 0);
7146 if (arm_address_register_rtx_p (index, strict_p)
7147 && (GET_MODE_SIZE (mode) <= 4))
7148 return 1;
7150 if (mode == DImode || mode == DFmode)
7152 if (code == CONST_INT)
7154 HOST_WIDE_INT val = INTVAL (index);
7156 if (TARGET_LDRD)
7157 return val > -256 && val < 256;
7158 else
7159 return val > -4096 && val < 4092;
7162 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7165 if (GET_MODE_SIZE (mode) <= 4
7166 && ! (arm_arch4
7167 && (mode == HImode
7168 || mode == HFmode
7169 || (mode == QImode && outer == SIGN_EXTEND))))
7171 if (code == MULT)
7173 rtx xiop0 = XEXP (index, 0);
7174 rtx xiop1 = XEXP (index, 1);
7176 return ((arm_address_register_rtx_p (xiop0, strict_p)
7177 && power_of_two_operand (xiop1, SImode))
7178 || (arm_address_register_rtx_p (xiop1, strict_p)
7179 && power_of_two_operand (xiop0, SImode)));
7181 else if (code == LSHIFTRT || code == ASHIFTRT
7182 || code == ASHIFT || code == ROTATERT)
7184 rtx op = XEXP (index, 1);
7186 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7187 && CONST_INT_P (op)
7188 && INTVAL (op) > 0
7189 && INTVAL (op) <= 31);
7193 /* For ARM v4 we may be doing a sign-extend operation during the
7194 load. */
7195 if (arm_arch4)
7197 if (mode == HImode
7198 || mode == HFmode
7199 || (outer == SIGN_EXTEND && mode == QImode))
7200 range = 256;
7201 else
7202 range = 4096;
7204 else
7205 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7207 return (code == CONST_INT
7208 && INTVAL (index) < range
7209 && INTVAL (index) > -range);
7212 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7213 index operand. i.e. 1, 2, 4 or 8. */
7214 static bool
7215 thumb2_index_mul_operand (rtx op)
7217 HOST_WIDE_INT val;
7219 if (!CONST_INT_P (op))
7220 return false;
7222 val = INTVAL(op);
7223 return (val == 1 || val == 2 || val == 4 || val == 8);
7226 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7227 static int
7228 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7230 enum rtx_code code = GET_CODE (index);
7232 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7233 /* Standard coprocessor addressing modes. */
7234 if (TARGET_HARD_FLOAT
7235 && TARGET_VFP
7236 && (mode == SFmode || mode == DFmode))
7237 return (code == CONST_INT && INTVAL (index) < 1024
7238 /* Thumb-2 allows only > -256 index range for it's core register
7239 load/stores. Since we allow SF/DF in core registers, we have
7240 to use the intersection between -256~4096 (core) and -1024~1024
7241 (coprocessor). */
7242 && INTVAL (index) > -256
7243 && (INTVAL (index) & 3) == 0);
7245 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7247 /* For DImode assume values will usually live in core regs
7248 and only allow LDRD addressing modes. */
7249 if (!TARGET_LDRD || mode != DImode)
7250 return (code == CONST_INT
7251 && INTVAL (index) < 1024
7252 && INTVAL (index) > -1024
7253 && (INTVAL (index) & 3) == 0);
7256 /* For quad modes, we restrict the constant offset to be slightly less
7257 than what the instruction format permits. We do this because for
7258 quad mode moves, we will actually decompose them into two separate
7259 double-mode reads or writes. INDEX must therefore be a valid
7260 (double-mode) offset and so should INDEX+8. */
7261 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7262 return (code == CONST_INT
7263 && INTVAL (index) < 1016
7264 && INTVAL (index) > -1024
7265 && (INTVAL (index) & 3) == 0);
7267 /* We have no such constraint on double mode offsets, so we permit the
7268 full range of the instruction format. */
7269 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7270 return (code == CONST_INT
7271 && INTVAL (index) < 1024
7272 && INTVAL (index) > -1024
7273 && (INTVAL (index) & 3) == 0);
7275 if (arm_address_register_rtx_p (index, strict_p)
7276 && (GET_MODE_SIZE (mode) <= 4))
7277 return 1;
7279 if (mode == DImode || mode == DFmode)
7281 if (code == CONST_INT)
7283 HOST_WIDE_INT val = INTVAL (index);
7284 /* ??? Can we assume ldrd for thumb2? */
7285 /* Thumb-2 ldrd only has reg+const addressing modes. */
7286 /* ldrd supports offsets of +-1020.
7287 However the ldr fallback does not. */
7288 return val > -256 && val < 256 && (val & 3) == 0;
7290 else
7291 return 0;
7294 if (code == MULT)
7296 rtx xiop0 = XEXP (index, 0);
7297 rtx xiop1 = XEXP (index, 1);
7299 return ((arm_address_register_rtx_p (xiop0, strict_p)
7300 && thumb2_index_mul_operand (xiop1))
7301 || (arm_address_register_rtx_p (xiop1, strict_p)
7302 && thumb2_index_mul_operand (xiop0)));
7304 else if (code == ASHIFT)
7306 rtx op = XEXP (index, 1);
7308 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7309 && CONST_INT_P (op)
7310 && INTVAL (op) > 0
7311 && INTVAL (op) <= 3);
7314 return (code == CONST_INT
7315 && INTVAL (index) < 4096
7316 && INTVAL (index) > -256);
7319 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7320 static int
7321 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7323 int regno;
7325 if (!REG_P (x))
7326 return 0;
7328 regno = REGNO (x);
7330 if (strict_p)
7331 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7333 return (regno <= LAST_LO_REGNUM
7334 || regno > LAST_VIRTUAL_REGISTER
7335 || regno == FRAME_POINTER_REGNUM
7336 || (GET_MODE_SIZE (mode) >= 4
7337 && (regno == STACK_POINTER_REGNUM
7338 || regno >= FIRST_PSEUDO_REGISTER
7339 || x == hard_frame_pointer_rtx
7340 || x == arg_pointer_rtx)));
7343 /* Return nonzero if x is a legitimate index register. This is the case
7344 for any base register that can access a QImode object. */
7345 inline static int
7346 thumb1_index_register_rtx_p (rtx x, int strict_p)
7348 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7351 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7353 The AP may be eliminated to either the SP or the FP, so we use the
7354 least common denominator, e.g. SImode, and offsets from 0 to 64.
7356 ??? Verify whether the above is the right approach.
7358 ??? Also, the FP may be eliminated to the SP, so perhaps that
7359 needs special handling also.
7361 ??? Look at how the mips16 port solves this problem. It probably uses
7362 better ways to solve some of these problems.
7364 Although it is not incorrect, we don't accept QImode and HImode
7365 addresses based on the frame pointer or arg pointer until the
7366 reload pass starts. This is so that eliminating such addresses
7367 into stack based ones won't produce impossible code. */
7369 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7371 /* ??? Not clear if this is right. Experiment. */
7372 if (GET_MODE_SIZE (mode) < 4
7373 && !(reload_in_progress || reload_completed)
7374 && (reg_mentioned_p (frame_pointer_rtx, x)
7375 || reg_mentioned_p (arg_pointer_rtx, x)
7376 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7377 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7378 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7379 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7380 return 0;
7382 /* Accept any base register. SP only in SImode or larger. */
7383 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7384 return 1;
7386 /* This is PC relative data before arm_reorg runs. */
7387 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7388 && GET_CODE (x) == SYMBOL_REF
7389 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7390 return 1;
7392 /* This is PC relative data after arm_reorg runs. */
7393 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7394 && reload_completed
7395 && (GET_CODE (x) == LABEL_REF
7396 || (GET_CODE (x) == CONST
7397 && GET_CODE (XEXP (x, 0)) == PLUS
7398 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7399 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7400 return 1;
7402 /* Post-inc indexing only supported for SImode and larger. */
7403 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7404 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7405 return 1;
7407 else if (GET_CODE (x) == PLUS)
7409 /* REG+REG address can be any two index registers. */
7410 /* We disallow FRAME+REG addressing since we know that FRAME
7411 will be replaced with STACK, and SP relative addressing only
7412 permits SP+OFFSET. */
7413 if (GET_MODE_SIZE (mode) <= 4
7414 && XEXP (x, 0) != frame_pointer_rtx
7415 && XEXP (x, 1) != frame_pointer_rtx
7416 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7417 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7418 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7419 return 1;
7421 /* REG+const has 5-7 bit offset for non-SP registers. */
7422 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7423 || XEXP (x, 0) == arg_pointer_rtx)
7424 && CONST_INT_P (XEXP (x, 1))
7425 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7426 return 1;
7428 /* REG+const has 10-bit offset for SP, but only SImode and
7429 larger is supported. */
7430 /* ??? Should probably check for DI/DFmode overflow here
7431 just like GO_IF_LEGITIMATE_OFFSET does. */
7432 else if (REG_P (XEXP (x, 0))
7433 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7434 && GET_MODE_SIZE (mode) >= 4
7435 && CONST_INT_P (XEXP (x, 1))
7436 && INTVAL (XEXP (x, 1)) >= 0
7437 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7438 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7439 return 1;
7441 else if (REG_P (XEXP (x, 0))
7442 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7443 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7444 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7445 && REGNO (XEXP (x, 0))
7446 <= LAST_VIRTUAL_POINTER_REGISTER))
7447 && GET_MODE_SIZE (mode) >= 4
7448 && CONST_INT_P (XEXP (x, 1))
7449 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7450 return 1;
7453 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7454 && GET_MODE_SIZE (mode) == 4
7455 && GET_CODE (x) == SYMBOL_REF
7456 && CONSTANT_POOL_ADDRESS_P (x)
7457 && ! (flag_pic
7458 && symbol_mentioned_p (get_pool_constant (x))
7459 && ! pcrel_constant_p (get_pool_constant (x))))
7460 return 1;
7462 return 0;
7465 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7466 instruction of mode MODE. */
7468 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7470 switch (GET_MODE_SIZE (mode))
7472 case 1:
7473 return val >= 0 && val < 32;
7475 case 2:
7476 return val >= 0 && val < 64 && (val & 1) == 0;
7478 default:
7479 return (val >= 0
7480 && (val + GET_MODE_SIZE (mode)) <= 128
7481 && (val & 3) == 0);
7485 bool
7486 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7488 if (TARGET_ARM)
7489 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7490 else if (TARGET_THUMB2)
7491 return thumb2_legitimate_address_p (mode, x, strict_p);
7492 else /* if (TARGET_THUMB1) */
7493 return thumb1_legitimate_address_p (mode, x, strict_p);
7496 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7498 Given an rtx X being reloaded into a reg required to be
7499 in class CLASS, return the class of reg to actually use.
7500 In general this is just CLASS, but for the Thumb core registers and
7501 immediate constants we prefer a LO_REGS class or a subset. */
7503 static reg_class_t
7504 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7506 if (TARGET_32BIT)
7507 return rclass;
7508 else
7510 if (rclass == GENERAL_REGS)
7511 return LO_REGS;
7512 else
7513 return rclass;
7517 /* Build the SYMBOL_REF for __tls_get_addr. */
7519 static GTY(()) rtx tls_get_addr_libfunc;
7521 static rtx
7522 get_tls_get_addr (void)
7524 if (!tls_get_addr_libfunc)
7525 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7526 return tls_get_addr_libfunc;
7530 arm_load_tp (rtx target)
7532 if (!target)
7533 target = gen_reg_rtx (SImode);
7535 if (TARGET_HARD_TP)
7537 /* Can return in any reg. */
7538 emit_insn (gen_load_tp_hard (target));
7540 else
7542 /* Always returned in r0. Immediately copy the result into a pseudo,
7543 otherwise other uses of r0 (e.g. setting up function arguments) may
7544 clobber the value. */
7546 rtx tmp;
7548 emit_insn (gen_load_tp_soft ());
7550 tmp = gen_rtx_REG (SImode, R0_REGNUM);
7551 emit_move_insn (target, tmp);
7553 return target;
7556 static rtx
7557 load_tls_operand (rtx x, rtx reg)
7559 rtx tmp;
7561 if (reg == NULL_RTX)
7562 reg = gen_reg_rtx (SImode);
7564 tmp = gen_rtx_CONST (SImode, x);
7566 emit_move_insn (reg, tmp);
7568 return reg;
7571 static rtx
7572 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7574 rtx insns, label, labelno, sum;
7576 gcc_assert (reloc != TLS_DESCSEQ);
7577 start_sequence ();
7579 labelno = GEN_INT (pic_labelno++);
7580 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7581 label = gen_rtx_CONST (VOIDmode, label);
7583 sum = gen_rtx_UNSPEC (Pmode,
7584 gen_rtvec (4, x, GEN_INT (reloc), label,
7585 GEN_INT (TARGET_ARM ? 8 : 4)),
7586 UNSPEC_TLS);
7587 reg = load_tls_operand (sum, reg);
7589 if (TARGET_ARM)
7590 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7591 else
7592 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7594 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7595 LCT_PURE, /* LCT_CONST? */
7596 Pmode, 1, reg, Pmode);
7598 insns = get_insns ();
7599 end_sequence ();
7601 return insns;
7604 static rtx
7605 arm_tls_descseq_addr (rtx x, rtx reg)
7607 rtx labelno = GEN_INT (pic_labelno++);
7608 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7609 rtx sum = gen_rtx_UNSPEC (Pmode,
7610 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7611 gen_rtx_CONST (VOIDmode, label),
7612 GEN_INT (!TARGET_ARM)),
7613 UNSPEC_TLS);
7614 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
7616 emit_insn (gen_tlscall (x, labelno));
7617 if (!reg)
7618 reg = gen_reg_rtx (SImode);
7619 else
7620 gcc_assert (REGNO (reg) != R0_REGNUM);
7622 emit_move_insn (reg, reg0);
7624 return reg;
7628 legitimize_tls_address (rtx x, rtx reg)
7630 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7631 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7633 switch (model)
7635 case TLS_MODEL_GLOBAL_DYNAMIC:
7636 if (TARGET_GNU2_TLS)
7638 reg = arm_tls_descseq_addr (x, reg);
7640 tp = arm_load_tp (NULL_RTX);
7642 dest = gen_rtx_PLUS (Pmode, tp, reg);
7644 else
7646 /* Original scheme */
7647 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7648 dest = gen_reg_rtx (Pmode);
7649 emit_libcall_block (insns, dest, ret, x);
7651 return dest;
7653 case TLS_MODEL_LOCAL_DYNAMIC:
7654 if (TARGET_GNU2_TLS)
7656 reg = arm_tls_descseq_addr (x, reg);
7658 tp = arm_load_tp (NULL_RTX);
7660 dest = gen_rtx_PLUS (Pmode, tp, reg);
7662 else
7664 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7666 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7667 share the LDM result with other LD model accesses. */
7668 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7669 UNSPEC_TLS);
7670 dest = gen_reg_rtx (Pmode);
7671 emit_libcall_block (insns, dest, ret, eqv);
7673 /* Load the addend. */
7674 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7675 GEN_INT (TLS_LDO32)),
7676 UNSPEC_TLS);
7677 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7678 dest = gen_rtx_PLUS (Pmode, dest, addend);
7680 return dest;
7682 case TLS_MODEL_INITIAL_EXEC:
7683 labelno = GEN_INT (pic_labelno++);
7684 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7685 label = gen_rtx_CONST (VOIDmode, label);
7686 sum = gen_rtx_UNSPEC (Pmode,
7687 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7688 GEN_INT (TARGET_ARM ? 8 : 4)),
7689 UNSPEC_TLS);
7690 reg = load_tls_operand (sum, reg);
7692 if (TARGET_ARM)
7693 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7694 else if (TARGET_THUMB2)
7695 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7696 else
7698 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7699 emit_move_insn (reg, gen_const_mem (SImode, reg));
7702 tp = arm_load_tp (NULL_RTX);
7704 return gen_rtx_PLUS (Pmode, tp, reg);
7706 case TLS_MODEL_LOCAL_EXEC:
7707 tp = arm_load_tp (NULL_RTX);
7709 reg = gen_rtx_UNSPEC (Pmode,
7710 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7711 UNSPEC_TLS);
7712 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7714 return gen_rtx_PLUS (Pmode, tp, reg);
7716 default:
7717 abort ();
7721 /* Try machine-dependent ways of modifying an illegitimate address
7722 to be legitimate. If we find one, return the new, valid address. */
7724 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7726 if (arm_tls_referenced_p (x))
7728 rtx addend = NULL;
7730 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7732 addend = XEXP (XEXP (x, 0), 1);
7733 x = XEXP (XEXP (x, 0), 0);
7736 if (GET_CODE (x) != SYMBOL_REF)
7737 return x;
7739 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7741 x = legitimize_tls_address (x, NULL_RTX);
7743 if (addend)
7745 x = gen_rtx_PLUS (SImode, x, addend);
7746 orig_x = x;
7748 else
7749 return x;
7752 if (!TARGET_ARM)
7754 /* TODO: legitimize_address for Thumb2. */
7755 if (TARGET_THUMB2)
7756 return x;
7757 return thumb_legitimize_address (x, orig_x, mode);
7760 if (GET_CODE (x) == PLUS)
7762 rtx xop0 = XEXP (x, 0);
7763 rtx xop1 = XEXP (x, 1);
7765 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7766 xop0 = force_reg (SImode, xop0);
7768 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7769 && !symbol_mentioned_p (xop1))
7770 xop1 = force_reg (SImode, xop1);
7772 if (ARM_BASE_REGISTER_RTX_P (xop0)
7773 && CONST_INT_P (xop1))
7775 HOST_WIDE_INT n, low_n;
7776 rtx base_reg, val;
7777 n = INTVAL (xop1);
7779 /* VFP addressing modes actually allow greater offsets, but for
7780 now we just stick with the lowest common denominator. */
7781 if (mode == DImode
7782 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7784 low_n = n & 0x0f;
7785 n &= ~0x0f;
7786 if (low_n > 4)
7788 n += 16;
7789 low_n -= 16;
7792 else
7794 low_n = ((mode) == TImode ? 0
7795 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7796 n -= low_n;
7799 base_reg = gen_reg_rtx (SImode);
7800 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7801 emit_move_insn (base_reg, val);
7802 x = plus_constant (Pmode, base_reg, low_n);
7804 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7805 x = gen_rtx_PLUS (SImode, xop0, xop1);
7808 /* XXX We don't allow MINUS any more -- see comment in
7809 arm_legitimate_address_outer_p (). */
7810 else if (GET_CODE (x) == MINUS)
7812 rtx xop0 = XEXP (x, 0);
7813 rtx xop1 = XEXP (x, 1);
7815 if (CONSTANT_P (xop0))
7816 xop0 = force_reg (SImode, xop0);
7818 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7819 xop1 = force_reg (SImode, xop1);
7821 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7822 x = gen_rtx_MINUS (SImode, xop0, xop1);
7825 /* Make sure to take full advantage of the pre-indexed addressing mode
7826 with absolute addresses which often allows for the base register to
7827 be factorized for multiple adjacent memory references, and it might
7828 even allows for the mini pool to be avoided entirely. */
7829 else if (CONST_INT_P (x) && optimize > 0)
7831 unsigned int bits;
7832 HOST_WIDE_INT mask, base, index;
7833 rtx base_reg;
7835 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7836 use a 8-bit index. So let's use a 12-bit index for SImode only and
7837 hope that arm_gen_constant will enable ldrb to use more bits. */
7838 bits = (mode == SImode) ? 12 : 8;
7839 mask = (1 << bits) - 1;
7840 base = INTVAL (x) & ~mask;
7841 index = INTVAL (x) & mask;
7842 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7844 /* It'll most probably be more efficient to generate the base
7845 with more bits set and use a negative index instead. */
7846 base |= mask;
7847 index -= mask;
7849 base_reg = force_reg (SImode, GEN_INT (base));
7850 x = plus_constant (Pmode, base_reg, index);
7853 if (flag_pic)
7855 /* We need to find and carefully transform any SYMBOL and LABEL
7856 references; so go back to the original address expression. */
7857 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7859 if (new_x != orig_x)
7860 x = new_x;
7863 return x;
7867 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7868 to be legitimate. If we find one, return the new, valid address. */
7870 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7872 if (GET_CODE (x) == PLUS
7873 && CONST_INT_P (XEXP (x, 1))
7874 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7875 || INTVAL (XEXP (x, 1)) < 0))
7877 rtx xop0 = XEXP (x, 0);
7878 rtx xop1 = XEXP (x, 1);
7879 HOST_WIDE_INT offset = INTVAL (xop1);
7881 /* Try and fold the offset into a biasing of the base register and
7882 then offsetting that. Don't do this when optimizing for space
7883 since it can cause too many CSEs. */
7884 if (optimize_size && offset >= 0
7885 && offset < 256 + 31 * GET_MODE_SIZE (mode))
7887 HOST_WIDE_INT delta;
7889 if (offset >= 256)
7890 delta = offset - (256 - GET_MODE_SIZE (mode));
7891 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7892 delta = 31 * GET_MODE_SIZE (mode);
7893 else
7894 delta = offset & (~31 * GET_MODE_SIZE (mode));
7896 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7897 NULL_RTX);
7898 x = plus_constant (Pmode, xop0, delta);
7900 else if (offset < 0 && offset > -256)
7901 /* Small negative offsets are best done with a subtract before the
7902 dereference, forcing these into a register normally takes two
7903 instructions. */
7904 x = force_operand (x, NULL_RTX);
7905 else
7907 /* For the remaining cases, force the constant into a register. */
7908 xop1 = force_reg (SImode, xop1);
7909 x = gen_rtx_PLUS (SImode, xop0, xop1);
7912 else if (GET_CODE (x) == PLUS
7913 && s_register_operand (XEXP (x, 1), SImode)
7914 && !s_register_operand (XEXP (x, 0), SImode))
7916 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7918 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7921 if (flag_pic)
7923 /* We need to find and carefully transform any SYMBOL and LABEL
7924 references; so go back to the original address expression. */
7925 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7927 if (new_x != orig_x)
7928 x = new_x;
7931 return x;
7934 bool
7935 arm_legitimize_reload_address (rtx *p,
7936 machine_mode mode,
7937 int opnum, int type,
7938 int ind_levels ATTRIBUTE_UNUSED)
7940 /* We must recognize output that we have already generated ourselves. */
7941 if (GET_CODE (*p) == PLUS
7942 && GET_CODE (XEXP (*p, 0)) == PLUS
7943 && REG_P (XEXP (XEXP (*p, 0), 0))
7944 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7945 && CONST_INT_P (XEXP (*p, 1)))
7947 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7948 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7949 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7950 return true;
7953 if (GET_CODE (*p) == PLUS
7954 && REG_P (XEXP (*p, 0))
7955 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7956 /* If the base register is equivalent to a constant, let the generic
7957 code handle it. Otherwise we will run into problems if a future
7958 reload pass decides to rematerialize the constant. */
7959 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7960 && CONST_INT_P (XEXP (*p, 1)))
7962 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7963 HOST_WIDE_INT low, high;
7965 /* Detect coprocessor load/stores. */
7966 bool coproc_p = ((TARGET_HARD_FLOAT
7967 && TARGET_VFP
7968 && (mode == SFmode || mode == DFmode))
7969 || (TARGET_REALLY_IWMMXT
7970 && VALID_IWMMXT_REG_MODE (mode))
7971 || (TARGET_NEON
7972 && (VALID_NEON_DREG_MODE (mode)
7973 || VALID_NEON_QREG_MODE (mode))));
7975 /* For some conditions, bail out when lower two bits are unaligned. */
7976 if ((val & 0x3) != 0
7977 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7978 && (coproc_p
7979 /* For DI, and DF under soft-float: */
7980 || ((mode == DImode || mode == DFmode)
7981 /* Without ldrd, we use stm/ldm, which does not
7982 fair well with unaligned bits. */
7983 && (! TARGET_LDRD
7984 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7985 || TARGET_THUMB2))))
7986 return false;
7988 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7989 of which the (reg+high) gets turned into a reload add insn,
7990 we try to decompose the index into high/low values that can often
7991 also lead to better reload CSE.
7992 For example:
7993 ldr r0, [r2, #4100] // Offset too large
7994 ldr r1, [r2, #4104] // Offset too large
7996 is best reloaded as:
7997 add t1, r2, #4096
7998 ldr r0, [t1, #4]
7999 add t2, r2, #4096
8000 ldr r1, [t2, #8]
8002 which post-reload CSE can simplify in most cases to eliminate the
8003 second add instruction:
8004 add t1, r2, #4096
8005 ldr r0, [t1, #4]
8006 ldr r1, [t1, #8]
8008 The idea here is that we want to split out the bits of the constant
8009 as a mask, rather than as subtracting the maximum offset that the
8010 respective type of load/store used can handle.
8012 When encountering negative offsets, we can still utilize it even if
8013 the overall offset is positive; sometimes this may lead to an immediate
8014 that can be constructed with fewer instructions.
8015 For example:
8016 ldr r0, [r2, #0x3FFFFC]
8018 This is best reloaded as:
8019 add t1, r2, #0x400000
8020 ldr r0, [t1, #-4]
8022 The trick for spotting this for a load insn with N bits of offset
8023 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
8024 negative offset that is going to make bit N and all the bits below
8025 it become zero in the remainder part.
8027 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
8028 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
8029 used in most cases of ARM load/store instructions. */
8031 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
8032 (((VAL) & ((1 << (N)) - 1)) \
8033 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
8034 : 0)
8036 if (coproc_p)
8038 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
8040 /* NEON quad-word load/stores are made of two double-word accesses,
8041 so the valid index range is reduced by 8. Treat as 9-bit range if
8042 we go over it. */
8043 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
8044 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
8046 else if (GET_MODE_SIZE (mode) == 8)
8048 if (TARGET_LDRD)
8049 low = (TARGET_THUMB2
8050 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
8051 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
8052 else
8053 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
8054 to access doublewords. The supported load/store offsets are
8055 -8, -4, and 4, which we try to produce here. */
8056 low = ((val & 0xf) ^ 0x8) - 0x8;
8058 else if (GET_MODE_SIZE (mode) < 8)
8060 /* NEON element load/stores do not have an offset. */
8061 if (TARGET_NEON_FP16 && mode == HFmode)
8062 return false;
8064 if (TARGET_THUMB2)
8066 /* Thumb-2 has an asymmetrical index range of (-256,4096).
8067 Try the wider 12-bit range first, and re-try if the result
8068 is out of range. */
8069 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
8070 if (low < -255)
8071 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
8073 else
8075 if (mode == HImode || mode == HFmode)
8077 if (arm_arch4)
8078 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
8079 else
8081 /* The storehi/movhi_bytes fallbacks can use only
8082 [-4094,+4094] of the full ldrb/strb index range. */
8083 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
8084 if (low == 4095 || low == -4095)
8085 return false;
8088 else
8089 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
8092 else
8093 return false;
8095 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
8096 ^ (unsigned HOST_WIDE_INT) 0x80000000)
8097 - (unsigned HOST_WIDE_INT) 0x80000000);
8098 /* Check for overflow or zero */
8099 if (low == 0 || high == 0 || (high + low != val))
8100 return false;
8102 /* Reload the high part into a base reg; leave the low part
8103 in the mem.
8104 Note that replacing this gen_rtx_PLUS with plus_constant is
8105 wrong in this case because we rely on the
8106 (plus (plus reg c1) c2) structure being preserved so that
8107 XEXP (*p, 0) in push_reload below uses the correct term. */
8108 *p = gen_rtx_PLUS (GET_MODE (*p),
8109 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
8110 GEN_INT (high)),
8111 GEN_INT (low));
8112 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
8113 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
8114 VOIDmode, 0, 0, opnum, (enum reload_type) type);
8115 return true;
8118 return false;
8122 thumb_legitimize_reload_address (rtx *x_p,
8123 machine_mode mode,
8124 int opnum, int type,
8125 int ind_levels ATTRIBUTE_UNUSED)
8127 rtx x = *x_p;
8129 if (GET_CODE (x) == PLUS
8130 && GET_MODE_SIZE (mode) < 4
8131 && REG_P (XEXP (x, 0))
8132 && XEXP (x, 0) == stack_pointer_rtx
8133 && CONST_INT_P (XEXP (x, 1))
8134 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8136 rtx orig_x = x;
8138 x = copy_rtx (x);
8139 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8140 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8141 return x;
8144 /* If both registers are hi-regs, then it's better to reload the
8145 entire expression rather than each register individually. That
8146 only requires one reload register rather than two. */
8147 if (GET_CODE (x) == PLUS
8148 && REG_P (XEXP (x, 0))
8149 && REG_P (XEXP (x, 1))
8150 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
8151 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
8153 rtx orig_x = x;
8155 x = copy_rtx (x);
8156 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8157 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8158 return x;
8161 return NULL;
8164 /* Return TRUE if X contains any TLS symbol references. */
8166 bool
8167 arm_tls_referenced_p (rtx x)
8169 if (! TARGET_HAVE_TLS)
8170 return false;
8172 subrtx_iterator::array_type array;
8173 FOR_EACH_SUBRTX (iter, array, x, ALL)
8175 const_rtx x = *iter;
8176 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8177 return true;
8179 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8180 TLS offsets, not real symbol references. */
8181 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8182 iter.skip_subrtxes ();
8184 return false;
8187 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8189 On the ARM, allow any integer (invalid ones are removed later by insn
8190 patterns), nice doubles and symbol_refs which refer to the function's
8191 constant pool XXX.
8193 When generating pic allow anything. */
8195 static bool
8196 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8198 return flag_pic || !label_mentioned_p (x);
8201 static bool
8202 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8204 return (CONST_INT_P (x)
8205 || CONST_DOUBLE_P (x)
8206 || CONSTANT_ADDRESS_P (x)
8207 || flag_pic);
8210 static bool
8211 arm_legitimate_constant_p (machine_mode mode, rtx x)
8213 return (!arm_cannot_force_const_mem (mode, x)
8214 && (TARGET_32BIT
8215 ? arm_legitimate_constant_p_1 (mode, x)
8216 : thumb_legitimate_constant_p (mode, x)));
8219 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8221 static bool
8222 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8224 rtx base, offset;
8226 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8228 split_const (x, &base, &offset);
8229 if (GET_CODE (base) == SYMBOL_REF
8230 && !offset_within_block_p (base, INTVAL (offset)))
8231 return true;
8233 return arm_tls_referenced_p (x);
8236 #define REG_OR_SUBREG_REG(X) \
8237 (REG_P (X) \
8238 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8240 #define REG_OR_SUBREG_RTX(X) \
8241 (REG_P (X) ? (X) : SUBREG_REG (X))
8243 static inline int
8244 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8246 machine_mode mode = GET_MODE (x);
8247 int total, words;
8249 switch (code)
8251 case ASHIFT:
8252 case ASHIFTRT:
8253 case LSHIFTRT:
8254 case ROTATERT:
8255 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8257 case PLUS:
8258 case MINUS:
8259 case COMPARE:
8260 case NEG:
8261 case NOT:
8262 return COSTS_N_INSNS (1);
8264 case MULT:
8265 if (CONST_INT_P (XEXP (x, 1)))
8267 int cycles = 0;
8268 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8270 while (i)
8272 i >>= 2;
8273 cycles++;
8275 return COSTS_N_INSNS (2) + cycles;
8277 return COSTS_N_INSNS (1) + 16;
8279 case SET:
8280 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8281 the mode. */
8282 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8283 return (COSTS_N_INSNS (words)
8284 + 4 * ((MEM_P (SET_SRC (x)))
8285 + MEM_P (SET_DEST (x))));
8287 case CONST_INT:
8288 if (outer == SET)
8290 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8291 return 0;
8292 if (thumb_shiftable_const (INTVAL (x)))
8293 return COSTS_N_INSNS (2);
8294 return COSTS_N_INSNS (3);
8296 else if ((outer == PLUS || outer == COMPARE)
8297 && INTVAL (x) < 256 && INTVAL (x) > -256)
8298 return 0;
8299 else if ((outer == IOR || outer == XOR || outer == AND)
8300 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8301 return COSTS_N_INSNS (1);
8302 else if (outer == AND)
8304 int i;
8305 /* This duplicates the tests in the andsi3 expander. */
8306 for (i = 9; i <= 31; i++)
8307 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8308 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8309 return COSTS_N_INSNS (2);
8311 else if (outer == ASHIFT || outer == ASHIFTRT
8312 || outer == LSHIFTRT)
8313 return 0;
8314 return COSTS_N_INSNS (2);
8316 case CONST:
8317 case CONST_DOUBLE:
8318 case LABEL_REF:
8319 case SYMBOL_REF:
8320 return COSTS_N_INSNS (3);
8322 case UDIV:
8323 case UMOD:
8324 case DIV:
8325 case MOD:
8326 return 100;
8328 case TRUNCATE:
8329 return 99;
8331 case AND:
8332 case XOR:
8333 case IOR:
8334 /* XXX guess. */
8335 return 8;
8337 case MEM:
8338 /* XXX another guess. */
8339 /* Memory costs quite a lot for the first word, but subsequent words
8340 load at the equivalent of a single insn each. */
8341 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8342 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8343 ? 4 : 0));
8345 case IF_THEN_ELSE:
8346 /* XXX a guess. */
8347 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8348 return 14;
8349 return 2;
8351 case SIGN_EXTEND:
8352 case ZERO_EXTEND:
8353 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8354 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8356 if (mode == SImode)
8357 return total;
8359 if (arm_arch6)
8360 return total + COSTS_N_INSNS (1);
8362 /* Assume a two-shift sequence. Increase the cost slightly so
8363 we prefer actual shifts over an extend operation. */
8364 return total + 1 + COSTS_N_INSNS (2);
8366 default:
8367 return 99;
8371 static inline bool
8372 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8374 machine_mode mode = GET_MODE (x);
8375 enum rtx_code subcode;
8376 rtx operand;
8377 enum rtx_code code = GET_CODE (x);
8378 *total = 0;
8380 switch (code)
8382 case MEM:
8383 /* Memory costs quite a lot for the first word, but subsequent words
8384 load at the equivalent of a single insn each. */
8385 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8386 return true;
8388 case DIV:
8389 case MOD:
8390 case UDIV:
8391 case UMOD:
8392 if (TARGET_HARD_FLOAT && mode == SFmode)
8393 *total = COSTS_N_INSNS (2);
8394 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8395 *total = COSTS_N_INSNS (4);
8396 else
8397 *total = COSTS_N_INSNS (20);
8398 return false;
8400 case ROTATE:
8401 if (REG_P (XEXP (x, 1)))
8402 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8403 else if (!CONST_INT_P (XEXP (x, 1)))
8404 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8406 /* Fall through */
8407 case ROTATERT:
8408 if (mode != SImode)
8410 *total += COSTS_N_INSNS (4);
8411 return true;
8414 /* Fall through */
8415 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8416 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8417 if (mode == DImode)
8419 *total += COSTS_N_INSNS (3);
8420 return true;
8423 *total += COSTS_N_INSNS (1);
8424 /* Increase the cost of complex shifts because they aren't any faster,
8425 and reduce dual issue opportunities. */
8426 if (arm_tune_cortex_a9
8427 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8428 ++*total;
8430 return true;
8432 case MINUS:
8433 if (mode == DImode)
8435 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8436 if (CONST_INT_P (XEXP (x, 0))
8437 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8439 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8440 return true;
8443 if (CONST_INT_P (XEXP (x, 1))
8444 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8446 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8447 return true;
8450 return false;
8453 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8455 if (TARGET_HARD_FLOAT
8456 && (mode == SFmode
8457 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8459 *total = COSTS_N_INSNS (1);
8460 if (CONST_DOUBLE_P (XEXP (x, 0))
8461 && arm_const_double_rtx (XEXP (x, 0)))
8463 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8464 return true;
8467 if (CONST_DOUBLE_P (XEXP (x, 1))
8468 && arm_const_double_rtx (XEXP (x, 1)))
8470 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8471 return true;
8474 return false;
8476 *total = COSTS_N_INSNS (20);
8477 return false;
8480 *total = COSTS_N_INSNS (1);
8481 if (CONST_INT_P (XEXP (x, 0))
8482 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8484 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8485 return true;
8488 subcode = GET_CODE (XEXP (x, 1));
8489 if (subcode == ASHIFT || subcode == ASHIFTRT
8490 || subcode == LSHIFTRT
8491 || subcode == ROTATE || subcode == ROTATERT)
8493 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8494 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8495 return true;
8498 /* A shift as a part of RSB costs no more than RSB itself. */
8499 if (GET_CODE (XEXP (x, 0)) == MULT
8500 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8502 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8503 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8504 return true;
8507 if (subcode == MULT
8508 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8510 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8511 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8512 return true;
8515 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8516 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8518 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8519 if (REG_P (XEXP (XEXP (x, 1), 0))
8520 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8521 *total += COSTS_N_INSNS (1);
8523 return true;
8526 /* Fall through */
8528 case PLUS:
8529 if (code == PLUS && arm_arch6 && mode == SImode
8530 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8531 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8533 *total = COSTS_N_INSNS (1);
8534 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8535 0, speed);
8536 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8537 return true;
8540 /* MLA: All arguments must be registers. We filter out
8541 multiplication by a power of two, so that we fall down into
8542 the code below. */
8543 if (GET_CODE (XEXP (x, 0)) == MULT
8544 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8546 /* The cost comes from the cost of the multiply. */
8547 return false;
8550 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8552 if (TARGET_HARD_FLOAT
8553 && (mode == SFmode
8554 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8556 *total = COSTS_N_INSNS (1);
8557 if (CONST_DOUBLE_P (XEXP (x, 1))
8558 && arm_const_double_rtx (XEXP (x, 1)))
8560 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8561 return true;
8564 return false;
8567 *total = COSTS_N_INSNS (20);
8568 return false;
8571 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8572 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8574 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8575 if (REG_P (XEXP (XEXP (x, 0), 0))
8576 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8577 *total += COSTS_N_INSNS (1);
8578 return true;
8581 /* Fall through */
8583 case AND: case XOR: case IOR:
8585 /* Normally the frame registers will be spilt into reg+const during
8586 reload, so it is a bad idea to combine them with other instructions,
8587 since then they might not be moved outside of loops. As a compromise
8588 we allow integration with ops that have a constant as their second
8589 operand. */
8590 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8591 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8592 && !CONST_INT_P (XEXP (x, 1)))
8593 *total = COSTS_N_INSNS (1);
8595 if (mode == DImode)
8597 *total += COSTS_N_INSNS (2);
8598 if (CONST_INT_P (XEXP (x, 1))
8599 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8601 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8602 return true;
8605 return false;
8608 *total += COSTS_N_INSNS (1);
8609 if (CONST_INT_P (XEXP (x, 1))
8610 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8612 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8613 return true;
8615 subcode = GET_CODE (XEXP (x, 0));
8616 if (subcode == ASHIFT || subcode == ASHIFTRT
8617 || subcode == LSHIFTRT
8618 || subcode == ROTATE || subcode == ROTATERT)
8620 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8621 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8622 return true;
8625 if (subcode == MULT
8626 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8628 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8629 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8630 return true;
8633 if (subcode == UMIN || subcode == UMAX
8634 || subcode == SMIN || subcode == SMAX)
8636 *total = COSTS_N_INSNS (3);
8637 return true;
8640 return false;
8642 case MULT:
8643 /* This should have been handled by the CPU specific routines. */
8644 gcc_unreachable ();
8646 case TRUNCATE:
8647 if (arm_arch3m && mode == SImode
8648 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8649 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8650 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8651 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8652 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8653 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8655 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8656 return true;
8658 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8659 return false;
8661 case NEG:
8662 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8664 if (TARGET_HARD_FLOAT
8665 && (mode == SFmode
8666 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8668 *total = COSTS_N_INSNS (1);
8669 return false;
8671 *total = COSTS_N_INSNS (2);
8672 return false;
8675 /* Fall through */
8676 case NOT:
8677 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8678 if (mode == SImode && code == NOT)
8680 subcode = GET_CODE (XEXP (x, 0));
8681 if (subcode == ASHIFT || subcode == ASHIFTRT
8682 || subcode == LSHIFTRT
8683 || subcode == ROTATE || subcode == ROTATERT
8684 || (subcode == MULT
8685 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8687 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8688 /* Register shifts cost an extra cycle. */
8689 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8690 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8691 subcode, 1, speed);
8692 return true;
8696 return false;
8698 case IF_THEN_ELSE:
8699 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8701 *total = COSTS_N_INSNS (4);
8702 return true;
8705 operand = XEXP (x, 0);
8707 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8708 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8709 && REG_P (XEXP (operand, 0))
8710 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8711 *total += COSTS_N_INSNS (1);
8712 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8713 + rtx_cost (XEXP (x, 2), code, 2, speed));
8714 return true;
8716 case NE:
8717 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8719 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8720 return true;
8722 goto scc_insn;
8724 case GE:
8725 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8726 && mode == SImode && XEXP (x, 1) == const0_rtx)
8728 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8729 return true;
8731 goto scc_insn;
8733 case LT:
8734 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8735 && mode == SImode && XEXP (x, 1) == const0_rtx)
8737 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8738 return true;
8740 goto scc_insn;
8742 case EQ:
8743 case GT:
8744 case LE:
8745 case GEU:
8746 case LTU:
8747 case GTU:
8748 case LEU:
8749 case UNORDERED:
8750 case ORDERED:
8751 case UNEQ:
8752 case UNGE:
8753 case UNLT:
8754 case UNGT:
8755 case UNLE:
8756 scc_insn:
8757 /* SCC insns. In the case where the comparison has already been
8758 performed, then they cost 2 instructions. Otherwise they need
8759 an additional comparison before them. */
8760 *total = COSTS_N_INSNS (2);
8761 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8763 return true;
8766 /* Fall through */
8767 case COMPARE:
8768 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8770 *total = 0;
8771 return true;
8774 *total += COSTS_N_INSNS (1);
8775 if (CONST_INT_P (XEXP (x, 1))
8776 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8778 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8779 return true;
8782 subcode = GET_CODE (XEXP (x, 0));
8783 if (subcode == ASHIFT || subcode == ASHIFTRT
8784 || subcode == LSHIFTRT
8785 || subcode == ROTATE || subcode == ROTATERT)
8787 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8788 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8789 return true;
8792 if (subcode == MULT
8793 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8795 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8796 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8797 return true;
8800 return false;
8802 case UMIN:
8803 case UMAX:
8804 case SMIN:
8805 case SMAX:
8806 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8807 if (!CONST_INT_P (XEXP (x, 1))
8808 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8809 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8810 return true;
8812 case ABS:
8813 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8815 if (TARGET_HARD_FLOAT
8816 && (mode == SFmode
8817 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8819 *total = COSTS_N_INSNS (1);
8820 return false;
8822 *total = COSTS_N_INSNS (20);
8823 return false;
8825 *total = COSTS_N_INSNS (1);
8826 if (mode == DImode)
8827 *total += COSTS_N_INSNS (3);
8828 return false;
8830 case SIGN_EXTEND:
8831 case ZERO_EXTEND:
8832 *total = 0;
8833 if (GET_MODE_CLASS (mode) == MODE_INT)
8835 rtx op = XEXP (x, 0);
8836 machine_mode opmode = GET_MODE (op);
8838 if (mode == DImode)
8839 *total += COSTS_N_INSNS (1);
8841 if (opmode != SImode)
8843 if (MEM_P (op))
8845 /* If !arm_arch4, we use one of the extendhisi2_mem
8846 or movhi_bytes patterns for HImode. For a QImode
8847 sign extension, we first zero-extend from memory
8848 and then perform a shift sequence. */
8849 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8850 *total += COSTS_N_INSNS (2);
8852 else if (arm_arch6)
8853 *total += COSTS_N_INSNS (1);
8855 /* We don't have the necessary insn, so we need to perform some
8856 other operation. */
8857 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8858 /* An and with constant 255. */
8859 *total += COSTS_N_INSNS (1);
8860 else
8861 /* A shift sequence. Increase costs slightly to avoid
8862 combining two shifts into an extend operation. */
8863 *total += COSTS_N_INSNS (2) + 1;
8866 return false;
8869 switch (GET_MODE (XEXP (x, 0)))
8871 case V8QImode:
8872 case V4HImode:
8873 case V2SImode:
8874 case V4QImode:
8875 case V2HImode:
8876 *total = COSTS_N_INSNS (1);
8877 return false;
8879 default:
8880 gcc_unreachable ();
8882 gcc_unreachable ();
8884 case ZERO_EXTRACT:
8885 case SIGN_EXTRACT:
8886 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8887 return true;
8889 case CONST_INT:
8890 if (const_ok_for_arm (INTVAL (x))
8891 || const_ok_for_arm (~INTVAL (x)))
8892 *total = COSTS_N_INSNS (1);
8893 else
8894 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8895 INTVAL (x), NULL_RTX,
8896 NULL_RTX, 0, 0));
8897 return true;
8899 case CONST:
8900 case LABEL_REF:
8901 case SYMBOL_REF:
8902 *total = COSTS_N_INSNS (3);
8903 return true;
8905 case HIGH:
8906 *total = COSTS_N_INSNS (1);
8907 return true;
8909 case LO_SUM:
8910 *total = COSTS_N_INSNS (1);
8911 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8912 return true;
8914 case CONST_DOUBLE:
8915 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8916 && (mode == SFmode || !TARGET_VFP_SINGLE))
8917 *total = COSTS_N_INSNS (1);
8918 else
8919 *total = COSTS_N_INSNS (4);
8920 return true;
8922 case SET:
8923 /* The vec_extract patterns accept memory operands that require an
8924 address reload. Account for the cost of that reload to give the
8925 auto-inc-dec pass an incentive to try to replace them. */
8926 if (TARGET_NEON && MEM_P (SET_DEST (x))
8927 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8929 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8930 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8931 *total += COSTS_N_INSNS (1);
8932 return true;
8934 /* Likewise for the vec_set patterns. */
8935 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8936 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8937 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8939 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8940 *total = rtx_cost (mem, code, 0, speed);
8941 if (!neon_vector_mem_operand (mem, 2, true))
8942 *total += COSTS_N_INSNS (1);
8943 return true;
8945 return false;
8947 case UNSPEC:
8948 /* We cost this as high as our memory costs to allow this to
8949 be hoisted from loops. */
8950 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8952 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8954 return true;
8956 case CONST_VECTOR:
8957 if (TARGET_NEON
8958 && TARGET_HARD_FLOAT
8959 && outer == SET
8960 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8961 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8962 *total = COSTS_N_INSNS (1);
8963 else
8964 *total = COSTS_N_INSNS (4);
8965 return true;
8967 default:
8968 *total = COSTS_N_INSNS (4);
8969 return false;
8973 /* Estimates the size cost of thumb1 instructions.
8974 For now most of the code is copied from thumb1_rtx_costs. We need more
8975 fine grain tuning when we have more related test cases. */
8976 static inline int
8977 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8979 machine_mode mode = GET_MODE (x);
8980 int words;
8982 switch (code)
8984 case ASHIFT:
8985 case ASHIFTRT:
8986 case LSHIFTRT:
8987 case ROTATERT:
8988 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8990 case PLUS:
8991 case MINUS:
8992 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8993 defined by RTL expansion, especially for the expansion of
8994 multiplication. */
8995 if ((GET_CODE (XEXP (x, 0)) == MULT
8996 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8997 || (GET_CODE (XEXP (x, 1)) == MULT
8998 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8999 return COSTS_N_INSNS (2);
9000 /* On purpose fall through for normal RTX. */
9001 case COMPARE:
9002 case NEG:
9003 case NOT:
9004 return COSTS_N_INSNS (1);
9006 case MULT:
9007 if (CONST_INT_P (XEXP (x, 1)))
9009 /* Thumb1 mul instruction can't operate on const. We must Load it
9010 into a register first. */
9011 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9012 /* For the targets which have a very small and high-latency multiply
9013 unit, we prefer to synthesize the mult with up to 5 instructions,
9014 giving a good balance between size and performance. */
9015 if (arm_arch6m && arm_m_profile_small_mul)
9016 return COSTS_N_INSNS (5);
9017 else
9018 return COSTS_N_INSNS (1) + const_size;
9020 return COSTS_N_INSNS (1);
9022 case SET:
9023 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9024 the mode. */
9025 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9026 return COSTS_N_INSNS (words)
9027 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x))
9028 || satisfies_constraint_K (SET_SRC (x))
9029 /* thumb1_movdi_insn. */
9030 || ((words > 1) && MEM_P (SET_SRC (x))));
9032 case CONST_INT:
9033 if (outer == SET)
9035 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
9036 return COSTS_N_INSNS (1);
9037 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9038 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9039 return COSTS_N_INSNS (2);
9040 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9041 if (thumb_shiftable_const (INTVAL (x)))
9042 return COSTS_N_INSNS (2);
9043 return COSTS_N_INSNS (3);
9045 else if ((outer == PLUS || outer == COMPARE)
9046 && INTVAL (x) < 256 && INTVAL (x) > -256)
9047 return 0;
9048 else if ((outer == IOR || outer == XOR || outer == AND)
9049 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9050 return COSTS_N_INSNS (1);
9051 else if (outer == AND)
9053 int i;
9054 /* This duplicates the tests in the andsi3 expander. */
9055 for (i = 9; i <= 31; i++)
9056 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
9057 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
9058 return COSTS_N_INSNS (2);
9060 else if (outer == ASHIFT || outer == ASHIFTRT
9061 || outer == LSHIFTRT)
9062 return 0;
9063 return COSTS_N_INSNS (2);
9065 case CONST:
9066 case CONST_DOUBLE:
9067 case LABEL_REF:
9068 case SYMBOL_REF:
9069 return COSTS_N_INSNS (3);
9071 case UDIV:
9072 case UMOD:
9073 case DIV:
9074 case MOD:
9075 return 100;
9077 case TRUNCATE:
9078 return 99;
9080 case AND:
9081 case XOR:
9082 case IOR:
9083 return COSTS_N_INSNS (1);
9085 case MEM:
9086 return (COSTS_N_INSNS (1)
9087 + COSTS_N_INSNS (1)
9088 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9089 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9090 ? COSTS_N_INSNS (1) : 0));
9092 case IF_THEN_ELSE:
9093 /* XXX a guess. */
9094 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9095 return 14;
9096 return 2;
9098 case ZERO_EXTEND:
9099 /* XXX still guessing. */
9100 switch (GET_MODE (XEXP (x, 0)))
9102 case QImode:
9103 return (1 + (mode == DImode ? 4 : 0)
9104 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9106 case HImode:
9107 return (4 + (mode == DImode ? 4 : 0)
9108 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9110 case SImode:
9111 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9113 default:
9114 return 99;
9117 default:
9118 return 99;
9122 /* RTX costs when optimizing for size. */
9123 static bool
9124 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9125 int *total)
9127 machine_mode mode = GET_MODE (x);
9128 if (TARGET_THUMB1)
9130 *total = thumb1_size_rtx_costs (x, code, outer_code);
9131 return true;
9134 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9135 switch (code)
9137 case MEM:
9138 /* A memory access costs 1 insn if the mode is small, or the address is
9139 a single register, otherwise it costs one insn per word. */
9140 if (REG_P (XEXP (x, 0)))
9141 *total = COSTS_N_INSNS (1);
9142 else if (flag_pic
9143 && GET_CODE (XEXP (x, 0)) == PLUS
9144 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9145 /* This will be split into two instructions.
9146 See arm.md:calculate_pic_address. */
9147 *total = COSTS_N_INSNS (2);
9148 else
9149 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9150 return true;
9152 case DIV:
9153 case MOD:
9154 case UDIV:
9155 case UMOD:
9156 /* Needs a libcall, so it costs about this. */
9157 *total = COSTS_N_INSNS (2);
9158 return false;
9160 case ROTATE:
9161 if (mode == SImode && REG_P (XEXP (x, 1)))
9163 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
9164 return true;
9166 /* Fall through */
9167 case ROTATERT:
9168 case ASHIFT:
9169 case LSHIFTRT:
9170 case ASHIFTRT:
9171 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9173 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
9174 return true;
9176 else if (mode == SImode)
9178 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
9179 /* Slightly disparage register shifts, but not by much. */
9180 if (!CONST_INT_P (XEXP (x, 1)))
9181 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
9182 return true;
9185 /* Needs a libcall. */
9186 *total = COSTS_N_INSNS (2);
9187 return false;
9189 case MINUS:
9190 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9191 && (mode == SFmode || !TARGET_VFP_SINGLE))
9193 *total = COSTS_N_INSNS (1);
9194 return false;
9197 if (mode == SImode)
9199 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9200 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9202 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9203 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9204 || subcode1 == ROTATE || subcode1 == ROTATERT
9205 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9206 || subcode1 == ASHIFTRT)
9208 /* It's just the cost of the two operands. */
9209 *total = 0;
9210 return false;
9213 *total = COSTS_N_INSNS (1);
9214 return false;
9217 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9218 return false;
9220 case PLUS:
9221 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9222 && (mode == SFmode || !TARGET_VFP_SINGLE))
9224 *total = COSTS_N_INSNS (1);
9225 return false;
9228 /* A shift as a part of ADD costs nothing. */
9229 if (GET_CODE (XEXP (x, 0)) == MULT
9230 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9232 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9233 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
9234 *total += rtx_cost (XEXP (x, 1), code, 1, false);
9235 return true;
9238 /* Fall through */
9239 case AND: case XOR: case IOR:
9240 if (mode == SImode)
9242 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9244 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9245 || subcode == LSHIFTRT || subcode == ASHIFTRT
9246 || (code == AND && subcode == NOT))
9248 /* It's just the cost of the two operands. */
9249 *total = 0;
9250 return false;
9254 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9255 return false;
9257 case MULT:
9258 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9259 return false;
9261 case NEG:
9262 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9263 && (mode == SFmode || !TARGET_VFP_SINGLE))
9265 *total = COSTS_N_INSNS (1);
9266 return false;
9269 /* Fall through */
9270 case NOT:
9271 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9273 return false;
9275 case IF_THEN_ELSE:
9276 *total = 0;
9277 return false;
9279 case COMPARE:
9280 if (cc_register (XEXP (x, 0), VOIDmode))
9281 * total = 0;
9282 else
9283 *total = COSTS_N_INSNS (1);
9284 return false;
9286 case ABS:
9287 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9288 && (mode == SFmode || !TARGET_VFP_SINGLE))
9289 *total = COSTS_N_INSNS (1);
9290 else
9291 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9292 return false;
9294 case SIGN_EXTEND:
9295 case ZERO_EXTEND:
9296 return arm_rtx_costs_1 (x, outer_code, total, 0);
9298 case CONST_INT:
9299 if (const_ok_for_arm (INTVAL (x)))
9300 /* A multiplication by a constant requires another instruction
9301 to load the constant to a register. */
9302 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9303 ? 1 : 0);
9304 else if (const_ok_for_arm (~INTVAL (x)))
9305 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9306 else if (const_ok_for_arm (-INTVAL (x)))
9308 if (outer_code == COMPARE || outer_code == PLUS
9309 || outer_code == MINUS)
9310 *total = 0;
9311 else
9312 *total = COSTS_N_INSNS (1);
9314 else
9315 *total = COSTS_N_INSNS (2);
9316 return true;
9318 case CONST:
9319 case LABEL_REF:
9320 case SYMBOL_REF:
9321 *total = COSTS_N_INSNS (2);
9322 return true;
9324 case CONST_DOUBLE:
9325 *total = COSTS_N_INSNS (4);
9326 return true;
9328 case CONST_VECTOR:
9329 if (TARGET_NEON
9330 && TARGET_HARD_FLOAT
9331 && outer_code == SET
9332 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9333 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9334 *total = COSTS_N_INSNS (1);
9335 else
9336 *total = COSTS_N_INSNS (4);
9337 return true;
9339 case HIGH:
9340 case LO_SUM:
9341 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9342 cost of these slightly. */
9343 *total = COSTS_N_INSNS (1) + 1;
9344 return true;
9346 case SET:
9347 return false;
9349 default:
9350 if (mode != VOIDmode)
9351 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9352 else
9353 *total = COSTS_N_INSNS (4); /* How knows? */
9354 return false;
9358 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9359 operand, then return the operand that is being shifted. If the shift
9360 is not by a constant, then set SHIFT_REG to point to the operand.
9361 Return NULL if OP is not a shifter operand. */
9362 static rtx
9363 shifter_op_p (rtx op, rtx *shift_reg)
9365 enum rtx_code code = GET_CODE (op);
9367 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9368 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9369 return XEXP (op, 0);
9370 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9371 return XEXP (op, 0);
9372 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9373 || code == ASHIFTRT)
9375 if (!CONST_INT_P (XEXP (op, 1)))
9376 *shift_reg = XEXP (op, 1);
9377 return XEXP (op, 0);
9380 return NULL;
9383 static bool
9384 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9386 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9387 gcc_assert (GET_CODE (x) == UNSPEC);
9389 switch (XINT (x, 1))
9391 case UNSPEC_UNALIGNED_LOAD:
9392 /* We can only do unaligned loads into the integer unit, and we can't
9393 use LDM or LDRD. */
9394 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9395 if (speed_p)
9396 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9397 + extra_cost->ldst.load_unaligned);
9399 #ifdef NOT_YET
9400 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9401 ADDR_SPACE_GENERIC, speed_p);
9402 #endif
9403 return true;
9405 case UNSPEC_UNALIGNED_STORE:
9406 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9407 if (speed_p)
9408 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9409 + extra_cost->ldst.store_unaligned);
9411 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9412 #ifdef NOT_YET
9413 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9414 ADDR_SPACE_GENERIC, speed_p);
9415 #endif
9416 return true;
9418 case UNSPEC_VRINTZ:
9419 case UNSPEC_VRINTP:
9420 case UNSPEC_VRINTM:
9421 case UNSPEC_VRINTR:
9422 case UNSPEC_VRINTX:
9423 case UNSPEC_VRINTA:
9424 *cost = COSTS_N_INSNS (1);
9425 if (speed_p)
9426 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9428 return true;
9429 default:
9430 *cost = COSTS_N_INSNS (2);
9431 break;
9433 return false;
9436 /* Cost of a libcall. We assume one insn per argument, an amount for the
9437 call (one insn for -Os) and then one for processing the result. */
9438 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9440 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9441 do \
9443 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9444 if (shift_op != NULL \
9445 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9447 if (shift_reg) \
9449 if (speed_p) \
9450 *cost += extra_cost->alu.arith_shift_reg; \
9451 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9453 else if (speed_p) \
9454 *cost += extra_cost->alu.arith_shift; \
9456 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9457 + rtx_cost (XEXP (x, 1 - IDX), \
9458 OP, 1, speed_p)); \
9459 return true; \
9462 while (0);
9464 /* RTX costs. Make an estimate of the cost of executing the operation
9465 X, which is contained with an operation with code OUTER_CODE.
9466 SPEED_P indicates whether the cost desired is the performance cost,
9467 or the size cost. The estimate is stored in COST and the return
9468 value is TRUE if the cost calculation is final, or FALSE if the
9469 caller should recurse through the operands of X to add additional
9470 costs.
9472 We currently make no attempt to model the size savings of Thumb-2
9473 16-bit instructions. At the normal points in compilation where
9474 this code is called we have no measure of whether the condition
9475 flags are live or not, and thus no realistic way to determine what
9476 the size will eventually be. */
9477 static bool
9478 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9479 const struct cpu_cost_table *extra_cost,
9480 int *cost, bool speed_p)
9482 machine_mode mode = GET_MODE (x);
9484 if (TARGET_THUMB1)
9486 if (speed_p)
9487 *cost = thumb1_rtx_costs (x, code, outer_code);
9488 else
9489 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9490 return true;
9493 switch (code)
9495 case SET:
9496 *cost = 0;
9497 /* SET RTXs don't have a mode so we get it from the destination. */
9498 mode = GET_MODE (SET_DEST (x));
9500 if (REG_P (SET_SRC (x))
9501 && REG_P (SET_DEST (x)))
9503 /* Assume that most copies can be done with a single insn,
9504 unless we don't have HW FP, in which case everything
9505 larger than word mode will require two insns. */
9506 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9507 && GET_MODE_SIZE (mode) > 4)
9508 || mode == DImode)
9509 ? 2 : 1);
9510 /* Conditional register moves can be encoded
9511 in 16 bits in Thumb mode. */
9512 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9513 *cost >>= 1;
9515 return true;
9518 if (CONST_INT_P (SET_SRC (x)))
9520 /* Handle CONST_INT here, since the value doesn't have a mode
9521 and we would otherwise be unable to work out the true cost. */
9522 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9523 outer_code = SET;
9524 /* Slightly lower the cost of setting a core reg to a constant.
9525 This helps break up chains and allows for better scheduling. */
9526 if (REG_P (SET_DEST (x))
9527 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9528 *cost -= 1;
9529 x = SET_SRC (x);
9530 /* Immediate moves with an immediate in the range [0, 255] can be
9531 encoded in 16 bits in Thumb mode. */
9532 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9533 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9534 *cost >>= 1;
9535 goto const_int_cost;
9538 return false;
9540 case MEM:
9541 /* A memory access costs 1 insn if the mode is small, or the address is
9542 a single register, otherwise it costs one insn per word. */
9543 if (REG_P (XEXP (x, 0)))
9544 *cost = COSTS_N_INSNS (1);
9545 else if (flag_pic
9546 && GET_CODE (XEXP (x, 0)) == PLUS
9547 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9548 /* This will be split into two instructions.
9549 See arm.md:calculate_pic_address. */
9550 *cost = COSTS_N_INSNS (2);
9551 else
9552 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9554 /* For speed optimizations, add the costs of the address and
9555 accessing memory. */
9556 if (speed_p)
9557 #ifdef NOT_YET
9558 *cost += (extra_cost->ldst.load
9559 + arm_address_cost (XEXP (x, 0), mode,
9560 ADDR_SPACE_GENERIC, speed_p));
9561 #else
9562 *cost += extra_cost->ldst.load;
9563 #endif
9564 return true;
9566 case PARALLEL:
9568 /* Calculations of LDM costs are complex. We assume an initial cost
9569 (ldm_1st) which will load the number of registers mentioned in
9570 ldm_regs_per_insn_1st registers; then each additional
9571 ldm_regs_per_insn_subsequent registers cost one more insn. The
9572 formula for N regs is thus:
9574 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9575 + ldm_regs_per_insn_subsequent - 1)
9576 / ldm_regs_per_insn_subsequent).
9578 Additional costs may also be added for addressing. A similar
9579 formula is used for STM. */
9581 bool is_ldm = load_multiple_operation (x, SImode);
9582 bool is_stm = store_multiple_operation (x, SImode);
9584 *cost = COSTS_N_INSNS (1);
9586 if (is_ldm || is_stm)
9588 if (speed_p)
9590 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9591 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9592 ? extra_cost->ldst.ldm_regs_per_insn_1st
9593 : extra_cost->ldst.stm_regs_per_insn_1st;
9594 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9595 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9596 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9598 *cost += regs_per_insn_1st
9599 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9600 + regs_per_insn_sub - 1)
9601 / regs_per_insn_sub);
9602 return true;
9606 return false;
9608 case DIV:
9609 case UDIV:
9610 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9611 && (mode == SFmode || !TARGET_VFP_SINGLE))
9612 *cost = COSTS_N_INSNS (speed_p
9613 ? extra_cost->fp[mode != SFmode].div : 1);
9614 else if (mode == SImode && TARGET_IDIV)
9615 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9616 else
9617 *cost = LIBCALL_COST (2);
9618 return false; /* All arguments must be in registers. */
9620 case MOD:
9621 case UMOD:
9622 *cost = LIBCALL_COST (2);
9623 return false; /* All arguments must be in registers. */
9625 case ROTATE:
9626 if (mode == SImode && REG_P (XEXP (x, 1)))
9628 *cost = (COSTS_N_INSNS (2)
9629 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9630 if (speed_p)
9631 *cost += extra_cost->alu.shift_reg;
9632 return true;
9634 /* Fall through */
9635 case ROTATERT:
9636 case ASHIFT:
9637 case LSHIFTRT:
9638 case ASHIFTRT:
9639 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9641 *cost = (COSTS_N_INSNS (3)
9642 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9643 if (speed_p)
9644 *cost += 2 * extra_cost->alu.shift;
9645 return true;
9647 else if (mode == SImode)
9649 *cost = (COSTS_N_INSNS (1)
9650 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9651 /* Slightly disparage register shifts at -Os, but not by much. */
9652 if (!CONST_INT_P (XEXP (x, 1)))
9653 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9654 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9655 return true;
9657 else if (GET_MODE_CLASS (mode) == MODE_INT
9658 && GET_MODE_SIZE (mode) < 4)
9660 if (code == ASHIFT)
9662 *cost = (COSTS_N_INSNS (1)
9663 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9664 /* Slightly disparage register shifts at -Os, but not by
9665 much. */
9666 if (!CONST_INT_P (XEXP (x, 1)))
9667 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9668 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9670 else if (code == LSHIFTRT || code == ASHIFTRT)
9672 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9674 /* Can use SBFX/UBFX. */
9675 *cost = COSTS_N_INSNS (1);
9676 if (speed_p)
9677 *cost += extra_cost->alu.bfx;
9678 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9680 else
9682 *cost = COSTS_N_INSNS (2);
9683 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9684 if (speed_p)
9686 if (CONST_INT_P (XEXP (x, 1)))
9687 *cost += 2 * extra_cost->alu.shift;
9688 else
9689 *cost += (extra_cost->alu.shift
9690 + extra_cost->alu.shift_reg);
9692 else
9693 /* Slightly disparage register shifts. */
9694 *cost += !CONST_INT_P (XEXP (x, 1));
9697 else /* Rotates. */
9699 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9700 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9701 if (speed_p)
9703 if (CONST_INT_P (XEXP (x, 1)))
9704 *cost += (2 * extra_cost->alu.shift
9705 + extra_cost->alu.log_shift);
9706 else
9707 *cost += (extra_cost->alu.shift
9708 + extra_cost->alu.shift_reg
9709 + extra_cost->alu.log_shift_reg);
9712 return true;
9715 *cost = LIBCALL_COST (2);
9716 return false;
9718 case BSWAP:
9719 if (arm_arch6)
9721 if (mode == SImode)
9723 *cost = COSTS_N_INSNS (1);
9724 if (speed_p)
9725 *cost += extra_cost->alu.rev;
9727 return false;
9730 else
9732 /* No rev instruction available. Look at arm_legacy_rev
9733 and thumb_legacy_rev for the form of RTL used then. */
9734 if (TARGET_THUMB)
9736 *cost = COSTS_N_INSNS (10);
9738 if (speed_p)
9740 *cost += 6 * extra_cost->alu.shift;
9741 *cost += 3 * extra_cost->alu.logical;
9744 else
9746 *cost = COSTS_N_INSNS (5);
9748 if (speed_p)
9750 *cost += 2 * extra_cost->alu.shift;
9751 *cost += extra_cost->alu.arith_shift;
9752 *cost += 2 * extra_cost->alu.logical;
9755 return true;
9757 return false;
9759 case MINUS:
9760 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9761 && (mode == SFmode || !TARGET_VFP_SINGLE))
9763 *cost = COSTS_N_INSNS (1);
9764 if (GET_CODE (XEXP (x, 0)) == MULT
9765 || GET_CODE (XEXP (x, 1)) == MULT)
9767 rtx mul_op0, mul_op1, sub_op;
9769 if (speed_p)
9770 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9772 if (GET_CODE (XEXP (x, 0)) == MULT)
9774 mul_op0 = XEXP (XEXP (x, 0), 0);
9775 mul_op1 = XEXP (XEXP (x, 0), 1);
9776 sub_op = XEXP (x, 1);
9778 else
9780 mul_op0 = XEXP (XEXP (x, 1), 0);
9781 mul_op1 = XEXP (XEXP (x, 1), 1);
9782 sub_op = XEXP (x, 0);
9785 /* The first operand of the multiply may be optionally
9786 negated. */
9787 if (GET_CODE (mul_op0) == NEG)
9788 mul_op0 = XEXP (mul_op0, 0);
9790 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9791 + rtx_cost (mul_op1, code, 0, speed_p)
9792 + rtx_cost (sub_op, code, 0, speed_p));
9794 return true;
9797 if (speed_p)
9798 *cost += extra_cost->fp[mode != SFmode].addsub;
9799 return false;
9802 if (mode == SImode)
9804 rtx shift_by_reg = NULL;
9805 rtx shift_op;
9806 rtx non_shift_op;
9808 *cost = COSTS_N_INSNS (1);
9810 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9811 if (shift_op == NULL)
9813 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9814 non_shift_op = XEXP (x, 0);
9816 else
9817 non_shift_op = XEXP (x, 1);
9819 if (shift_op != NULL)
9821 if (shift_by_reg != NULL)
9823 if (speed_p)
9824 *cost += extra_cost->alu.arith_shift_reg;
9825 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9827 else if (speed_p)
9828 *cost += extra_cost->alu.arith_shift;
9830 *cost += (rtx_cost (shift_op, code, 0, speed_p)
9831 + rtx_cost (non_shift_op, code, 0, speed_p));
9832 return true;
9835 if (arm_arch_thumb2
9836 && GET_CODE (XEXP (x, 1)) == MULT)
9838 /* MLS. */
9839 if (speed_p)
9840 *cost += extra_cost->mult[0].add;
9841 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9842 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9843 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9844 return true;
9847 if (CONST_INT_P (XEXP (x, 0)))
9849 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9850 INTVAL (XEXP (x, 0)), NULL_RTX,
9851 NULL_RTX, 1, 0);
9852 *cost = COSTS_N_INSNS (insns);
9853 if (speed_p)
9854 *cost += insns * extra_cost->alu.arith;
9855 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9856 return true;
9858 else if (speed_p)
9859 *cost += extra_cost->alu.arith;
9861 return false;
9864 if (GET_MODE_CLASS (mode) == MODE_INT
9865 && GET_MODE_SIZE (mode) < 4)
9867 rtx shift_op, shift_reg;
9868 shift_reg = NULL;
9870 /* We check both sides of the MINUS for shifter operands since,
9871 unlike PLUS, it's not commutative. */
9873 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9874 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9876 /* Slightly disparage, as we might need to widen the result. */
9877 *cost = 1 + COSTS_N_INSNS (1);
9878 if (speed_p)
9879 *cost += extra_cost->alu.arith;
9881 if (CONST_INT_P (XEXP (x, 0)))
9883 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9884 return true;
9887 return false;
9890 if (mode == DImode)
9892 *cost = COSTS_N_INSNS (2);
9894 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9896 rtx op1 = XEXP (x, 1);
9898 if (speed_p)
9899 *cost += 2 * extra_cost->alu.arith;
9901 if (GET_CODE (op1) == ZERO_EXTEND)
9902 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9903 else
9904 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9905 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9906 0, speed_p);
9907 return true;
9909 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9911 if (speed_p)
9912 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9913 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9914 0, speed_p)
9915 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9916 return true;
9918 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9919 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9921 if (speed_p)
9922 *cost += (extra_cost->alu.arith
9923 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9924 ? extra_cost->alu.arith
9925 : extra_cost->alu.arith_shift));
9926 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9927 + rtx_cost (XEXP (XEXP (x, 1), 0),
9928 GET_CODE (XEXP (x, 1)), 0, speed_p));
9929 return true;
9932 if (speed_p)
9933 *cost += 2 * extra_cost->alu.arith;
9934 return false;
9937 /* Vector mode? */
9939 *cost = LIBCALL_COST (2);
9940 return false;
9942 case PLUS:
9943 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9944 && (mode == SFmode || !TARGET_VFP_SINGLE))
9946 *cost = COSTS_N_INSNS (1);
9947 if (GET_CODE (XEXP (x, 0)) == MULT)
9949 rtx mul_op0, mul_op1, add_op;
9951 if (speed_p)
9952 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9954 mul_op0 = XEXP (XEXP (x, 0), 0);
9955 mul_op1 = XEXP (XEXP (x, 0), 1);
9956 add_op = XEXP (x, 1);
9958 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9959 + rtx_cost (mul_op1, code, 0, speed_p)
9960 + rtx_cost (add_op, code, 0, speed_p));
9962 return true;
9965 if (speed_p)
9966 *cost += extra_cost->fp[mode != SFmode].addsub;
9967 return false;
9969 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9971 *cost = LIBCALL_COST (2);
9972 return false;
9975 /* Narrow modes can be synthesized in SImode, but the range
9976 of useful sub-operations is limited. Check for shift operations
9977 on one of the operands. Only left shifts can be used in the
9978 narrow modes. */
9979 if (GET_MODE_CLASS (mode) == MODE_INT
9980 && GET_MODE_SIZE (mode) < 4)
9982 rtx shift_op, shift_reg;
9983 shift_reg = NULL;
9985 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9987 if (CONST_INT_P (XEXP (x, 1)))
9989 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9990 INTVAL (XEXP (x, 1)), NULL_RTX,
9991 NULL_RTX, 1, 0);
9992 *cost = COSTS_N_INSNS (insns);
9993 if (speed_p)
9994 *cost += insns * extra_cost->alu.arith;
9995 /* Slightly penalize a narrow operation as the result may
9996 need widening. */
9997 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9998 return true;
10001 /* Slightly penalize a narrow operation as the result may
10002 need widening. */
10003 *cost = 1 + COSTS_N_INSNS (1);
10004 if (speed_p)
10005 *cost += extra_cost->alu.arith;
10007 return false;
10010 if (mode == SImode)
10012 rtx shift_op, shift_reg;
10014 *cost = COSTS_N_INSNS (1);
10015 if (TARGET_INT_SIMD
10016 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10017 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10019 /* UXTA[BH] or SXTA[BH]. */
10020 if (speed_p)
10021 *cost += extra_cost->alu.extend_arith;
10022 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
10023 speed_p)
10024 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
10025 return true;
10028 shift_reg = NULL;
10029 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10030 if (shift_op != NULL)
10032 if (shift_reg)
10034 if (speed_p)
10035 *cost += extra_cost->alu.arith_shift_reg;
10036 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10038 else if (speed_p)
10039 *cost += extra_cost->alu.arith_shift;
10041 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10042 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10043 return true;
10045 if (GET_CODE (XEXP (x, 0)) == MULT)
10047 rtx mul_op = XEXP (x, 0);
10049 *cost = COSTS_N_INSNS (1);
10051 if (TARGET_DSP_MULTIPLY
10052 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10053 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10054 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10055 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10056 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10057 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10058 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10059 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10060 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10061 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10062 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10063 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10064 == 16))))))
10066 /* SMLA[BT][BT]. */
10067 if (speed_p)
10068 *cost += extra_cost->mult[0].extend_add;
10069 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
10070 SIGN_EXTEND, 0, speed_p)
10071 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
10072 SIGN_EXTEND, 0, speed_p)
10073 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10074 return true;
10077 if (speed_p)
10078 *cost += extra_cost->mult[0].add;
10079 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
10080 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
10081 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10082 return true;
10084 if (CONST_INT_P (XEXP (x, 1)))
10086 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10087 INTVAL (XEXP (x, 1)), NULL_RTX,
10088 NULL_RTX, 1, 0);
10089 *cost = COSTS_N_INSNS (insns);
10090 if (speed_p)
10091 *cost += insns * extra_cost->alu.arith;
10092 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
10093 return true;
10095 else if (speed_p)
10096 *cost += extra_cost->alu.arith;
10098 return false;
10101 if (mode == DImode)
10103 if (arm_arch3m
10104 && GET_CODE (XEXP (x, 0)) == MULT
10105 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10106 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10107 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10108 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10110 *cost = COSTS_N_INSNS (1);
10111 if (speed_p)
10112 *cost += extra_cost->mult[1].extend_add;
10113 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
10114 ZERO_EXTEND, 0, speed_p)
10115 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
10116 ZERO_EXTEND, 0, speed_p)
10117 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10118 return true;
10121 *cost = COSTS_N_INSNS (2);
10123 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10124 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10126 if (speed_p)
10127 *cost += (extra_cost->alu.arith
10128 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10129 ? extra_cost->alu.arith
10130 : extra_cost->alu.arith_shift));
10132 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
10133 speed_p)
10134 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10135 return true;
10138 if (speed_p)
10139 *cost += 2 * extra_cost->alu.arith;
10140 return false;
10143 /* Vector mode? */
10144 *cost = LIBCALL_COST (2);
10145 return false;
10146 case IOR:
10147 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10149 *cost = COSTS_N_INSNS (1);
10150 if (speed_p)
10151 *cost += extra_cost->alu.rev;
10153 return true;
10155 /* Fall through. */
10156 case AND: case XOR:
10157 if (mode == SImode)
10159 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10160 rtx op0 = XEXP (x, 0);
10161 rtx shift_op, shift_reg;
10163 *cost = COSTS_N_INSNS (1);
10165 if (subcode == NOT
10166 && (code == AND
10167 || (code == IOR && TARGET_THUMB2)))
10168 op0 = XEXP (op0, 0);
10170 shift_reg = NULL;
10171 shift_op = shifter_op_p (op0, &shift_reg);
10172 if (shift_op != NULL)
10174 if (shift_reg)
10176 if (speed_p)
10177 *cost += extra_cost->alu.log_shift_reg;
10178 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10180 else if (speed_p)
10181 *cost += extra_cost->alu.log_shift;
10183 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10184 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10185 return true;
10188 if (CONST_INT_P (XEXP (x, 1)))
10190 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10191 INTVAL (XEXP (x, 1)), NULL_RTX,
10192 NULL_RTX, 1, 0);
10194 *cost = COSTS_N_INSNS (insns);
10195 if (speed_p)
10196 *cost += insns * extra_cost->alu.logical;
10197 *cost += rtx_cost (op0, code, 0, speed_p);
10198 return true;
10201 if (speed_p)
10202 *cost += extra_cost->alu.logical;
10203 *cost += (rtx_cost (op0, code, 0, speed_p)
10204 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10205 return true;
10208 if (mode == DImode)
10210 rtx op0 = XEXP (x, 0);
10211 enum rtx_code subcode = GET_CODE (op0);
10213 *cost = COSTS_N_INSNS (2);
10215 if (subcode == NOT
10216 && (code == AND
10217 || (code == IOR && TARGET_THUMB2)))
10218 op0 = XEXP (op0, 0);
10220 if (GET_CODE (op0) == ZERO_EXTEND)
10222 if (speed_p)
10223 *cost += 2 * extra_cost->alu.logical;
10225 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
10226 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10227 return true;
10229 else if (GET_CODE (op0) == SIGN_EXTEND)
10231 if (speed_p)
10232 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10234 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
10235 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10236 return true;
10239 if (speed_p)
10240 *cost += 2 * extra_cost->alu.logical;
10242 return true;
10244 /* Vector mode? */
10246 *cost = LIBCALL_COST (2);
10247 return false;
10249 case MULT:
10250 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10251 && (mode == SFmode || !TARGET_VFP_SINGLE))
10253 rtx op0 = XEXP (x, 0);
10255 *cost = COSTS_N_INSNS (1);
10257 if (GET_CODE (op0) == NEG)
10258 op0 = XEXP (op0, 0);
10260 if (speed_p)
10261 *cost += extra_cost->fp[mode != SFmode].mult;
10263 *cost += (rtx_cost (op0, MULT, 0, speed_p)
10264 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10265 return true;
10267 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10269 *cost = LIBCALL_COST (2);
10270 return false;
10273 if (mode == SImode)
10275 *cost = COSTS_N_INSNS (1);
10276 if (TARGET_DSP_MULTIPLY
10277 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10278 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10279 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10280 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10281 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10282 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10283 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10284 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10285 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10286 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10287 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10288 && (INTVAL (XEXP (XEXP (x, 1), 1))
10289 == 16))))))
10291 /* SMUL[TB][TB]. */
10292 if (speed_p)
10293 *cost += extra_cost->mult[0].extend;
10294 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10295 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10296 return true;
10298 if (speed_p)
10299 *cost += extra_cost->mult[0].simple;
10300 return false;
10303 if (mode == DImode)
10305 if (arm_arch3m
10306 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10307 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10308 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10309 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10311 *cost = COSTS_N_INSNS (1);
10312 if (speed_p)
10313 *cost += extra_cost->mult[1].extend;
10314 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10315 ZERO_EXTEND, 0, speed_p)
10316 + rtx_cost (XEXP (XEXP (x, 1), 0),
10317 ZERO_EXTEND, 0, speed_p));
10318 return true;
10321 *cost = LIBCALL_COST (2);
10322 return false;
10325 /* Vector mode? */
10326 *cost = LIBCALL_COST (2);
10327 return false;
10329 case NEG:
10330 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10331 && (mode == SFmode || !TARGET_VFP_SINGLE))
10333 *cost = COSTS_N_INSNS (1);
10334 if (speed_p)
10335 *cost += extra_cost->fp[mode != SFmode].neg;
10337 return false;
10339 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10341 *cost = LIBCALL_COST (1);
10342 return false;
10345 if (mode == SImode)
10347 if (GET_CODE (XEXP (x, 0)) == ABS)
10349 *cost = COSTS_N_INSNS (2);
10350 /* Assume the non-flag-changing variant. */
10351 if (speed_p)
10352 *cost += (extra_cost->alu.log_shift
10353 + extra_cost->alu.arith_shift);
10354 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10355 return true;
10358 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10359 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10361 *cost = COSTS_N_INSNS (2);
10362 /* No extra cost for MOV imm and MVN imm. */
10363 /* If the comparison op is using the flags, there's no further
10364 cost, otherwise we need to add the cost of the comparison. */
10365 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10366 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10367 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10369 *cost += (COSTS_N_INSNS (1)
10370 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10371 speed_p)
10372 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10373 speed_p));
10374 if (speed_p)
10375 *cost += extra_cost->alu.arith;
10377 return true;
10379 *cost = COSTS_N_INSNS (1);
10380 if (speed_p)
10381 *cost += extra_cost->alu.arith;
10382 return false;
10385 if (GET_MODE_CLASS (mode) == MODE_INT
10386 && GET_MODE_SIZE (mode) < 4)
10388 /* Slightly disparage, as we might need an extend operation. */
10389 *cost = 1 + COSTS_N_INSNS (1);
10390 if (speed_p)
10391 *cost += extra_cost->alu.arith;
10392 return false;
10395 if (mode == DImode)
10397 *cost = COSTS_N_INSNS (2);
10398 if (speed_p)
10399 *cost += 2 * extra_cost->alu.arith;
10400 return false;
10403 /* Vector mode? */
10404 *cost = LIBCALL_COST (1);
10405 return false;
10407 case NOT:
10408 if (mode == SImode)
10410 rtx shift_op;
10411 rtx shift_reg = NULL;
10413 *cost = COSTS_N_INSNS (1);
10414 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10416 if (shift_op)
10418 if (shift_reg != NULL)
10420 if (speed_p)
10421 *cost += extra_cost->alu.log_shift_reg;
10422 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10424 else if (speed_p)
10425 *cost += extra_cost->alu.log_shift;
10426 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10427 return true;
10430 if (speed_p)
10431 *cost += extra_cost->alu.logical;
10432 return false;
10434 if (mode == DImode)
10436 *cost = COSTS_N_INSNS (2);
10437 return false;
10440 /* Vector mode? */
10442 *cost += LIBCALL_COST (1);
10443 return false;
10445 case IF_THEN_ELSE:
10447 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10449 *cost = COSTS_N_INSNS (4);
10450 return true;
10452 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10453 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10455 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10456 /* Assume that if one arm of the if_then_else is a register,
10457 that it will be tied with the result and eliminate the
10458 conditional insn. */
10459 if (REG_P (XEXP (x, 1)))
10460 *cost += op2cost;
10461 else if (REG_P (XEXP (x, 2)))
10462 *cost += op1cost;
10463 else
10465 if (speed_p)
10467 if (extra_cost->alu.non_exec_costs_exec)
10468 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10469 else
10470 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10472 else
10473 *cost += op1cost + op2cost;
10476 return true;
10478 case COMPARE:
10479 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10480 *cost = 0;
10481 else
10483 machine_mode op0mode;
10484 /* We'll mostly assume that the cost of a compare is the cost of the
10485 LHS. However, there are some notable exceptions. */
10487 /* Floating point compares are never done as side-effects. */
10488 op0mode = GET_MODE (XEXP (x, 0));
10489 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10490 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10492 *cost = COSTS_N_INSNS (1);
10493 if (speed_p)
10494 *cost += extra_cost->fp[op0mode != SFmode].compare;
10496 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10498 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10499 return true;
10502 return false;
10504 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10506 *cost = LIBCALL_COST (2);
10507 return false;
10510 /* DImode compares normally take two insns. */
10511 if (op0mode == DImode)
10513 *cost = COSTS_N_INSNS (2);
10514 if (speed_p)
10515 *cost += 2 * extra_cost->alu.arith;
10516 return false;
10519 if (op0mode == SImode)
10521 rtx shift_op;
10522 rtx shift_reg;
10524 if (XEXP (x, 1) == const0_rtx
10525 && !(REG_P (XEXP (x, 0))
10526 || (GET_CODE (XEXP (x, 0)) == SUBREG
10527 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10529 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10531 /* Multiply operations that set the flags are often
10532 significantly more expensive. */
10533 if (speed_p
10534 && GET_CODE (XEXP (x, 0)) == MULT
10535 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10536 *cost += extra_cost->mult[0].flag_setting;
10538 if (speed_p
10539 && GET_CODE (XEXP (x, 0)) == PLUS
10540 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10541 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10542 0), 1), mode))
10543 *cost += extra_cost->mult[0].flag_setting;
10544 return true;
10547 shift_reg = NULL;
10548 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10549 if (shift_op != NULL)
10551 *cost = COSTS_N_INSNS (1);
10552 if (shift_reg != NULL)
10554 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10555 if (speed_p)
10556 *cost += extra_cost->alu.arith_shift_reg;
10558 else if (speed_p)
10559 *cost += extra_cost->alu.arith_shift;
10560 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10561 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10562 return true;
10565 *cost = COSTS_N_INSNS (1);
10566 if (speed_p)
10567 *cost += extra_cost->alu.arith;
10568 if (CONST_INT_P (XEXP (x, 1))
10569 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10571 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10572 return true;
10574 return false;
10577 /* Vector mode? */
10579 *cost = LIBCALL_COST (2);
10580 return false;
10582 return true;
10584 case EQ:
10585 case NE:
10586 case LT:
10587 case LE:
10588 case GT:
10589 case GE:
10590 case LTU:
10591 case LEU:
10592 case GEU:
10593 case GTU:
10594 case ORDERED:
10595 case UNORDERED:
10596 case UNEQ:
10597 case UNLE:
10598 case UNLT:
10599 case UNGE:
10600 case UNGT:
10601 case LTGT:
10602 if (outer_code == SET)
10604 /* Is it a store-flag operation? */
10605 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10606 && XEXP (x, 1) == const0_rtx)
10608 /* Thumb also needs an IT insn. */
10609 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10610 return true;
10612 if (XEXP (x, 1) == const0_rtx)
10614 switch (code)
10616 case LT:
10617 /* LSR Rd, Rn, #31. */
10618 *cost = COSTS_N_INSNS (1);
10619 if (speed_p)
10620 *cost += extra_cost->alu.shift;
10621 break;
10623 case EQ:
10624 /* RSBS T1, Rn, #0
10625 ADC Rd, Rn, T1. */
10627 case NE:
10628 /* SUBS T1, Rn, #1
10629 SBC Rd, Rn, T1. */
10630 *cost = COSTS_N_INSNS (2);
10631 break;
10633 case LE:
10634 /* RSBS T1, Rn, Rn, LSR #31
10635 ADC Rd, Rn, T1. */
10636 *cost = COSTS_N_INSNS (2);
10637 if (speed_p)
10638 *cost += extra_cost->alu.arith_shift;
10639 break;
10641 case GT:
10642 /* RSB Rd, Rn, Rn, ASR #1
10643 LSR Rd, Rd, #31. */
10644 *cost = COSTS_N_INSNS (2);
10645 if (speed_p)
10646 *cost += (extra_cost->alu.arith_shift
10647 + extra_cost->alu.shift);
10648 break;
10650 case GE:
10651 /* ASR Rd, Rn, #31
10652 ADD Rd, Rn, #1. */
10653 *cost = COSTS_N_INSNS (2);
10654 if (speed_p)
10655 *cost += extra_cost->alu.shift;
10656 break;
10658 default:
10659 /* Remaining cases are either meaningless or would take
10660 three insns anyway. */
10661 *cost = COSTS_N_INSNS (3);
10662 break;
10664 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10665 return true;
10667 else
10669 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10670 if (CONST_INT_P (XEXP (x, 1))
10671 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10673 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10674 return true;
10677 return false;
10680 /* Not directly inside a set. If it involves the condition code
10681 register it must be the condition for a branch, cond_exec or
10682 I_T_E operation. Since the comparison is performed elsewhere
10683 this is just the control part which has no additional
10684 cost. */
10685 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10686 && XEXP (x, 1) == const0_rtx)
10688 *cost = 0;
10689 return true;
10691 return false;
10693 case ABS:
10694 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10695 && (mode == SFmode || !TARGET_VFP_SINGLE))
10697 *cost = COSTS_N_INSNS (1);
10698 if (speed_p)
10699 *cost += extra_cost->fp[mode != SFmode].neg;
10701 return false;
10703 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10705 *cost = LIBCALL_COST (1);
10706 return false;
10709 if (mode == SImode)
10711 *cost = COSTS_N_INSNS (1);
10712 if (speed_p)
10713 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10714 return false;
10716 /* Vector mode? */
10717 *cost = LIBCALL_COST (1);
10718 return false;
10720 case SIGN_EXTEND:
10721 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10722 && MEM_P (XEXP (x, 0)))
10724 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10726 if (mode == DImode)
10727 *cost += COSTS_N_INSNS (1);
10729 if (!speed_p)
10730 return true;
10732 if (GET_MODE (XEXP (x, 0)) == SImode)
10733 *cost += extra_cost->ldst.load;
10734 else
10735 *cost += extra_cost->ldst.load_sign_extend;
10737 if (mode == DImode)
10738 *cost += extra_cost->alu.shift;
10740 return true;
10743 /* Widening from less than 32-bits requires an extend operation. */
10744 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10746 /* We have SXTB/SXTH. */
10747 *cost = COSTS_N_INSNS (1);
10748 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10749 if (speed_p)
10750 *cost += extra_cost->alu.extend;
10752 else if (GET_MODE (XEXP (x, 0)) != SImode)
10754 /* Needs two shifts. */
10755 *cost = COSTS_N_INSNS (2);
10756 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10757 if (speed_p)
10758 *cost += 2 * extra_cost->alu.shift;
10761 /* Widening beyond 32-bits requires one more insn. */
10762 if (mode == DImode)
10764 *cost += COSTS_N_INSNS (1);
10765 if (speed_p)
10766 *cost += extra_cost->alu.shift;
10769 return true;
10771 case ZERO_EXTEND:
10772 if ((arm_arch4
10773 || GET_MODE (XEXP (x, 0)) == SImode
10774 || GET_MODE (XEXP (x, 0)) == QImode)
10775 && MEM_P (XEXP (x, 0)))
10777 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10779 if (mode == DImode)
10780 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10782 return true;
10785 /* Widening from less than 32-bits requires an extend operation. */
10786 if (GET_MODE (XEXP (x, 0)) == QImode)
10788 /* UXTB can be a shorter instruction in Thumb2, but it might
10789 be slower than the AND Rd, Rn, #255 alternative. When
10790 optimizing for speed it should never be slower to use
10791 AND, and we don't really model 16-bit vs 32-bit insns
10792 here. */
10793 *cost = COSTS_N_INSNS (1);
10794 if (speed_p)
10795 *cost += extra_cost->alu.logical;
10797 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10799 /* We have UXTB/UXTH. */
10800 *cost = COSTS_N_INSNS (1);
10801 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10802 if (speed_p)
10803 *cost += extra_cost->alu.extend;
10805 else if (GET_MODE (XEXP (x, 0)) != SImode)
10807 /* Needs two shifts. It's marginally preferable to use
10808 shifts rather than two BIC instructions as the second
10809 shift may merge with a subsequent insn as a shifter
10810 op. */
10811 *cost = COSTS_N_INSNS (2);
10812 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10813 if (speed_p)
10814 *cost += 2 * extra_cost->alu.shift;
10816 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10817 *cost = COSTS_N_INSNS (1);
10819 /* Widening beyond 32-bits requires one more insn. */
10820 if (mode == DImode)
10822 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10825 return true;
10827 case CONST_INT:
10828 *cost = 0;
10829 /* CONST_INT has no mode, so we cannot tell for sure how many
10830 insns are really going to be needed. The best we can do is
10831 look at the value passed. If it fits in SImode, then assume
10832 that's the mode it will be used for. Otherwise assume it
10833 will be used in DImode. */
10834 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10835 mode = SImode;
10836 else
10837 mode = DImode;
10839 /* Avoid blowing up in arm_gen_constant (). */
10840 if (!(outer_code == PLUS
10841 || outer_code == AND
10842 || outer_code == IOR
10843 || outer_code == XOR
10844 || outer_code == MINUS))
10845 outer_code = SET;
10847 const_int_cost:
10848 if (mode == SImode)
10850 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10851 INTVAL (x), NULL, NULL,
10852 0, 0));
10853 /* Extra costs? */
10855 else
10857 *cost += COSTS_N_INSNS (arm_gen_constant
10858 (outer_code, SImode, NULL,
10859 trunc_int_for_mode (INTVAL (x), SImode),
10860 NULL, NULL, 0, 0)
10861 + arm_gen_constant (outer_code, SImode, NULL,
10862 INTVAL (x) >> 32, NULL,
10863 NULL, 0, 0));
10864 /* Extra costs? */
10867 return true;
10869 case CONST:
10870 case LABEL_REF:
10871 case SYMBOL_REF:
10872 if (speed_p)
10874 if (arm_arch_thumb2 && !flag_pic)
10875 *cost = COSTS_N_INSNS (2);
10876 else
10877 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10879 else
10880 *cost = COSTS_N_INSNS (2);
10882 if (flag_pic)
10884 *cost += COSTS_N_INSNS (1);
10885 if (speed_p)
10886 *cost += extra_cost->alu.arith;
10889 return true;
10891 case CONST_FIXED:
10892 *cost = COSTS_N_INSNS (4);
10893 /* Fixme. */
10894 return true;
10896 case CONST_DOUBLE:
10897 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10898 && (mode == SFmode || !TARGET_VFP_SINGLE))
10900 if (vfp3_const_double_rtx (x))
10902 *cost = COSTS_N_INSNS (1);
10903 if (speed_p)
10904 *cost += extra_cost->fp[mode == DFmode].fpconst;
10905 return true;
10908 if (speed_p)
10910 *cost = COSTS_N_INSNS (1);
10911 if (mode == DFmode)
10912 *cost += extra_cost->ldst.loadd;
10913 else
10914 *cost += extra_cost->ldst.loadf;
10916 else
10917 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10919 return true;
10921 *cost = COSTS_N_INSNS (4);
10922 return true;
10924 case CONST_VECTOR:
10925 /* Fixme. */
10926 if (TARGET_NEON
10927 && TARGET_HARD_FLOAT
10928 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10929 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10930 *cost = COSTS_N_INSNS (1);
10931 else
10932 *cost = COSTS_N_INSNS (4);
10933 return true;
10935 case HIGH:
10936 case LO_SUM:
10937 *cost = COSTS_N_INSNS (1);
10938 /* When optimizing for size, we prefer constant pool entries to
10939 MOVW/MOVT pairs, so bump the cost of these slightly. */
10940 if (!speed_p)
10941 *cost += 1;
10942 return true;
10944 case CLZ:
10945 *cost = COSTS_N_INSNS (1);
10946 if (speed_p)
10947 *cost += extra_cost->alu.clz;
10948 return false;
10950 case SMIN:
10951 if (XEXP (x, 1) == const0_rtx)
10953 *cost = COSTS_N_INSNS (1);
10954 if (speed_p)
10955 *cost += extra_cost->alu.log_shift;
10956 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10957 return true;
10959 /* Fall through. */
10960 case SMAX:
10961 case UMIN:
10962 case UMAX:
10963 *cost = COSTS_N_INSNS (2);
10964 return false;
10966 case TRUNCATE:
10967 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10968 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10969 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10970 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10971 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10972 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10973 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10974 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10975 == ZERO_EXTEND))))
10977 *cost = COSTS_N_INSNS (1);
10978 if (speed_p)
10979 *cost += extra_cost->mult[1].extend;
10980 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10981 speed_p)
10982 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10983 0, speed_p));
10984 return true;
10986 *cost = LIBCALL_COST (1);
10987 return false;
10989 case UNSPEC:
10990 return arm_unspec_cost (x, outer_code, speed_p, cost);
10992 case PC:
10993 /* Reading the PC is like reading any other register. Writing it
10994 is more expensive, but we take that into account elsewhere. */
10995 *cost = 0;
10996 return true;
10998 case ZERO_EXTRACT:
10999 /* TODO: Simple zero_extract of bottom bits using AND. */
11000 /* Fall through. */
11001 case SIGN_EXTRACT:
11002 if (arm_arch6
11003 && mode == SImode
11004 && CONST_INT_P (XEXP (x, 1))
11005 && CONST_INT_P (XEXP (x, 2)))
11007 *cost = COSTS_N_INSNS (1);
11008 if (speed_p)
11009 *cost += extra_cost->alu.bfx;
11010 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11011 return true;
11013 /* Without UBFX/SBFX, need to resort to shift operations. */
11014 *cost = COSTS_N_INSNS (2);
11015 if (speed_p)
11016 *cost += 2 * extra_cost->alu.shift;
11017 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
11018 return true;
11020 case FLOAT_EXTEND:
11021 if (TARGET_HARD_FLOAT)
11023 *cost = COSTS_N_INSNS (1);
11024 if (speed_p)
11025 *cost += extra_cost->fp[mode == DFmode].widen;
11026 if (!TARGET_FPU_ARMV8
11027 && GET_MODE (XEXP (x, 0)) == HFmode)
11029 /* Pre v8, widening HF->DF is a two-step process, first
11030 widening to SFmode. */
11031 *cost += COSTS_N_INSNS (1);
11032 if (speed_p)
11033 *cost += extra_cost->fp[0].widen;
11035 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11036 return true;
11039 *cost = LIBCALL_COST (1);
11040 return false;
11042 case FLOAT_TRUNCATE:
11043 if (TARGET_HARD_FLOAT)
11045 *cost = COSTS_N_INSNS (1);
11046 if (speed_p)
11047 *cost += extra_cost->fp[mode == DFmode].narrow;
11048 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11049 return true;
11050 /* Vector modes? */
11052 *cost = LIBCALL_COST (1);
11053 return false;
11055 case FMA:
11056 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11058 rtx op0 = XEXP (x, 0);
11059 rtx op1 = XEXP (x, 1);
11060 rtx op2 = XEXP (x, 2);
11062 *cost = COSTS_N_INSNS (1);
11064 /* vfms or vfnma. */
11065 if (GET_CODE (op0) == NEG)
11066 op0 = XEXP (op0, 0);
11068 /* vfnms or vfnma. */
11069 if (GET_CODE (op2) == NEG)
11070 op2 = XEXP (op2, 0);
11072 *cost += rtx_cost (op0, FMA, 0, speed_p);
11073 *cost += rtx_cost (op1, FMA, 1, speed_p);
11074 *cost += rtx_cost (op2, FMA, 2, speed_p);
11076 if (speed_p)
11077 *cost += extra_cost->fp[mode ==DFmode].fma;
11079 return true;
11082 *cost = LIBCALL_COST (3);
11083 return false;
11085 case FIX:
11086 case UNSIGNED_FIX:
11087 if (TARGET_HARD_FLOAT)
11089 if (GET_MODE_CLASS (mode) == MODE_INT)
11091 *cost = COSTS_N_INSNS (1);
11092 if (speed_p)
11093 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
11094 /* Strip of the 'cost' of rounding towards zero. */
11095 if (GET_CODE (XEXP (x, 0)) == FIX)
11096 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
11097 else
11098 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11099 /* ??? Increase the cost to deal with transferring from
11100 FP -> CORE registers? */
11101 return true;
11103 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11104 && TARGET_FPU_ARMV8)
11106 *cost = COSTS_N_INSNS (1);
11107 if (speed_p)
11108 *cost += extra_cost->fp[mode == DFmode].roundint;
11109 return false;
11111 /* Vector costs? */
11113 *cost = LIBCALL_COST (1);
11114 return false;
11116 case FLOAT:
11117 case UNSIGNED_FLOAT:
11118 if (TARGET_HARD_FLOAT)
11120 /* ??? Increase the cost to deal with transferring from CORE
11121 -> FP registers? */
11122 *cost = COSTS_N_INSNS (1);
11123 if (speed_p)
11124 *cost += extra_cost->fp[mode == DFmode].fromint;
11125 return false;
11127 *cost = LIBCALL_COST (1);
11128 return false;
11130 case CALL:
11131 *cost = COSTS_N_INSNS (1);
11132 return true;
11134 case ASM_OPERANDS:
11136 /* Just a guess. Guess number of instructions in the asm
11137 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11138 though (see PR60663). */
11139 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11140 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11142 *cost = COSTS_N_INSNS (asm_length + num_operands);
11143 return true;
11145 default:
11146 if (mode != VOIDmode)
11147 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11148 else
11149 *cost = COSTS_N_INSNS (4); /* Who knows? */
11150 return false;
11154 #undef HANDLE_NARROW_SHIFT_ARITH
11156 /* RTX costs when optimizing for size. */
11157 static bool
11158 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
11159 int *total, bool speed)
11161 bool result;
11163 if (TARGET_OLD_RTX_COSTS
11164 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
11166 /* Old way. (Deprecated.) */
11167 if (!speed)
11168 result = arm_size_rtx_costs (x, (enum rtx_code) code,
11169 (enum rtx_code) outer_code, total);
11170 else
11171 result = current_tune->rtx_costs (x, (enum rtx_code) code,
11172 (enum rtx_code) outer_code, total,
11173 speed);
11175 else
11177 /* New way. */
11178 if (current_tune->insn_extra_cost)
11179 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11180 (enum rtx_code) outer_code,
11181 current_tune->insn_extra_cost,
11182 total, speed);
11183 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11184 && current_tune->insn_extra_cost != NULL */
11185 else
11186 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11187 (enum rtx_code) outer_code,
11188 &generic_extra_costs, total, speed);
11191 if (dump_file && (dump_flags & TDF_DETAILS))
11193 print_rtl_single (dump_file, x);
11194 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11195 *total, result ? "final" : "partial");
11197 return result;
11200 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11201 supported on any "slowmul" cores, so it can be ignored. */
11203 static bool
11204 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11205 int *total, bool speed)
11207 machine_mode mode = GET_MODE (x);
11209 if (TARGET_THUMB)
11211 *total = thumb1_rtx_costs (x, code, outer_code);
11212 return true;
11215 switch (code)
11217 case MULT:
11218 if (GET_MODE_CLASS (mode) == MODE_FLOAT
11219 || mode == DImode)
11221 *total = COSTS_N_INSNS (20);
11222 return false;
11225 if (CONST_INT_P (XEXP (x, 1)))
11227 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11228 & (unsigned HOST_WIDE_INT) 0xffffffff);
11229 int cost, const_ok = const_ok_for_arm (i);
11230 int j, booth_unit_size;
11232 /* Tune as appropriate. */
11233 cost = const_ok ? 4 : 8;
11234 booth_unit_size = 2;
11235 for (j = 0; i && j < 32; j += booth_unit_size)
11237 i >>= booth_unit_size;
11238 cost++;
11241 *total = COSTS_N_INSNS (cost);
11242 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
11243 return true;
11246 *total = COSTS_N_INSNS (20);
11247 return false;
11249 default:
11250 return arm_rtx_costs_1 (x, outer_code, total, speed);;
11255 /* RTX cost for cores with a fast multiply unit (M variants). */
11257 static bool
11258 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11259 int *total, bool speed)
11261 machine_mode mode = GET_MODE (x);
11263 if (TARGET_THUMB1)
11265 *total = thumb1_rtx_costs (x, code, outer_code);
11266 return true;
11269 /* ??? should thumb2 use different costs? */
11270 switch (code)
11272 case MULT:
11273 /* There is no point basing this on the tuning, since it is always the
11274 fast variant if it exists at all. */
11275 if (mode == DImode
11276 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11277 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11278 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11280 *total = COSTS_N_INSNS(2);
11281 return false;
11285 if (mode == DImode)
11287 *total = COSTS_N_INSNS (5);
11288 return false;
11291 if (CONST_INT_P (XEXP (x, 1)))
11293 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11294 & (unsigned HOST_WIDE_INT) 0xffffffff);
11295 int cost, const_ok = const_ok_for_arm (i);
11296 int j, booth_unit_size;
11298 /* Tune as appropriate. */
11299 cost = const_ok ? 4 : 8;
11300 booth_unit_size = 8;
11301 for (j = 0; i && j < 32; j += booth_unit_size)
11303 i >>= booth_unit_size;
11304 cost++;
11307 *total = COSTS_N_INSNS(cost);
11308 return false;
11311 if (mode == SImode)
11313 *total = COSTS_N_INSNS (4);
11314 return false;
11317 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11319 if (TARGET_HARD_FLOAT
11320 && (mode == SFmode
11321 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11323 *total = COSTS_N_INSNS (1);
11324 return false;
11328 /* Requires a lib call */
11329 *total = COSTS_N_INSNS (20);
11330 return false;
11332 default:
11333 return arm_rtx_costs_1 (x, outer_code, total, speed);
11338 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11339 so it can be ignored. */
11341 static bool
11342 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11343 int *total, bool speed)
11345 machine_mode mode = GET_MODE (x);
11347 if (TARGET_THUMB)
11349 *total = thumb1_rtx_costs (x, code, outer_code);
11350 return true;
11353 switch (code)
11355 case COMPARE:
11356 if (GET_CODE (XEXP (x, 0)) != MULT)
11357 return arm_rtx_costs_1 (x, outer_code, total, speed);
11359 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11360 will stall until the multiplication is complete. */
11361 *total = COSTS_N_INSNS (3);
11362 return false;
11364 case MULT:
11365 /* There is no point basing this on the tuning, since it is always the
11366 fast variant if it exists at all. */
11367 if (mode == DImode
11368 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11369 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11370 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11372 *total = COSTS_N_INSNS (2);
11373 return false;
11377 if (mode == DImode)
11379 *total = COSTS_N_INSNS (5);
11380 return false;
11383 if (CONST_INT_P (XEXP (x, 1)))
11385 /* If operand 1 is a constant we can more accurately
11386 calculate the cost of the multiply. The multiplier can
11387 retire 15 bits on the first cycle and a further 12 on the
11388 second. We do, of course, have to load the constant into
11389 a register first. */
11390 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11391 /* There's a general overhead of one cycle. */
11392 int cost = 1;
11393 unsigned HOST_WIDE_INT masked_const;
11395 if (i & 0x80000000)
11396 i = ~i;
11398 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11400 masked_const = i & 0xffff8000;
11401 if (masked_const != 0)
11403 cost++;
11404 masked_const = i & 0xf8000000;
11405 if (masked_const != 0)
11406 cost++;
11408 *total = COSTS_N_INSNS (cost);
11409 return false;
11412 if (mode == SImode)
11414 *total = COSTS_N_INSNS (3);
11415 return false;
11418 /* Requires a lib call */
11419 *total = COSTS_N_INSNS (20);
11420 return false;
11422 default:
11423 return arm_rtx_costs_1 (x, outer_code, total, speed);
11428 /* RTX costs for 9e (and later) cores. */
11430 static bool
11431 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11432 int *total, bool speed)
11434 machine_mode mode = GET_MODE (x);
11436 if (TARGET_THUMB1)
11438 switch (code)
11440 case MULT:
11441 /* Small multiply: 32 cycles for an integer multiply inst. */
11442 if (arm_arch6m && arm_m_profile_small_mul)
11443 *total = COSTS_N_INSNS (32);
11444 else
11445 *total = COSTS_N_INSNS (3);
11446 return true;
11448 default:
11449 *total = thumb1_rtx_costs (x, code, outer_code);
11450 return true;
11454 switch (code)
11456 case MULT:
11457 /* There is no point basing this on the tuning, since it is always the
11458 fast variant if it exists at all. */
11459 if (mode == DImode
11460 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11461 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11462 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11464 *total = COSTS_N_INSNS (2);
11465 return false;
11469 if (mode == DImode)
11471 *total = COSTS_N_INSNS (5);
11472 return false;
11475 if (mode == SImode)
11477 *total = COSTS_N_INSNS (2);
11478 return false;
11481 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11483 if (TARGET_HARD_FLOAT
11484 && (mode == SFmode
11485 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11487 *total = COSTS_N_INSNS (1);
11488 return false;
11492 *total = COSTS_N_INSNS (20);
11493 return false;
11495 default:
11496 return arm_rtx_costs_1 (x, outer_code, total, speed);
11499 /* All address computations that can be done are free, but rtx cost returns
11500 the same for practically all of them. So we weight the different types
11501 of address here in the order (most pref first):
11502 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11503 static inline int
11504 arm_arm_address_cost (rtx x)
11506 enum rtx_code c = GET_CODE (x);
11508 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11509 return 0;
11510 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11511 return 10;
11513 if (c == PLUS)
11515 if (CONST_INT_P (XEXP (x, 1)))
11516 return 2;
11518 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11519 return 3;
11521 return 4;
11524 return 6;
11527 static inline int
11528 arm_thumb_address_cost (rtx x)
11530 enum rtx_code c = GET_CODE (x);
11532 if (c == REG)
11533 return 1;
11534 if (c == PLUS
11535 && REG_P (XEXP (x, 0))
11536 && CONST_INT_P (XEXP (x, 1)))
11537 return 1;
11539 return 2;
11542 static int
11543 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11544 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11546 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11549 /* Adjust cost hook for XScale. */
11550 static bool
11551 xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11553 /* Some true dependencies can have a higher cost depending
11554 on precisely how certain input operands are used. */
11555 if (REG_NOTE_KIND(link) == 0
11556 && recog_memoized (insn) >= 0
11557 && recog_memoized (dep) >= 0)
11559 int shift_opnum = get_attr_shift (insn);
11560 enum attr_type attr_type = get_attr_type (dep);
11562 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11563 operand for INSN. If we have a shifted input operand and the
11564 instruction we depend on is another ALU instruction, then we may
11565 have to account for an additional stall. */
11566 if (shift_opnum != 0
11567 && (attr_type == TYPE_ALU_SHIFT_IMM
11568 || attr_type == TYPE_ALUS_SHIFT_IMM
11569 || attr_type == TYPE_LOGIC_SHIFT_IMM
11570 || attr_type == TYPE_LOGICS_SHIFT_IMM
11571 || attr_type == TYPE_ALU_SHIFT_REG
11572 || attr_type == TYPE_ALUS_SHIFT_REG
11573 || attr_type == TYPE_LOGIC_SHIFT_REG
11574 || attr_type == TYPE_LOGICS_SHIFT_REG
11575 || attr_type == TYPE_MOV_SHIFT
11576 || attr_type == TYPE_MVN_SHIFT
11577 || attr_type == TYPE_MOV_SHIFT_REG
11578 || attr_type == TYPE_MVN_SHIFT_REG))
11580 rtx shifted_operand;
11581 int opno;
11583 /* Get the shifted operand. */
11584 extract_insn (insn);
11585 shifted_operand = recog_data.operand[shift_opnum];
11587 /* Iterate over all the operands in DEP. If we write an operand
11588 that overlaps with SHIFTED_OPERAND, then we have increase the
11589 cost of this dependency. */
11590 extract_insn (dep);
11591 preprocess_constraints (dep);
11592 for (opno = 0; opno < recog_data.n_operands; opno++)
11594 /* We can ignore strict inputs. */
11595 if (recog_data.operand_type[opno] == OP_IN)
11596 continue;
11598 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11599 shifted_operand))
11601 *cost = 2;
11602 return false;
11607 return true;
11610 /* Adjust cost hook for Cortex A9. */
11611 static bool
11612 cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11614 switch (REG_NOTE_KIND (link))
11616 case REG_DEP_ANTI:
11617 *cost = 0;
11618 return false;
11620 case REG_DEP_TRUE:
11621 case REG_DEP_OUTPUT:
11622 if (recog_memoized (insn) >= 0
11623 && recog_memoized (dep) >= 0)
11625 if (GET_CODE (PATTERN (insn)) == SET)
11627 if (GET_MODE_CLASS
11628 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11629 || GET_MODE_CLASS
11630 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11632 enum attr_type attr_type_insn = get_attr_type (insn);
11633 enum attr_type attr_type_dep = get_attr_type (dep);
11635 /* By default all dependencies of the form
11636 s0 = s0 <op> s1
11637 s0 = s0 <op> s2
11638 have an extra latency of 1 cycle because
11639 of the input and output dependency in this
11640 case. However this gets modeled as an true
11641 dependency and hence all these checks. */
11642 if (REG_P (SET_DEST (PATTERN (insn)))
11643 && REG_P (SET_DEST (PATTERN (dep)))
11644 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11645 SET_DEST (PATTERN (dep))))
11647 /* FMACS is a special case where the dependent
11648 instruction can be issued 3 cycles before
11649 the normal latency in case of an output
11650 dependency. */
11651 if ((attr_type_insn == TYPE_FMACS
11652 || attr_type_insn == TYPE_FMACD)
11653 && (attr_type_dep == TYPE_FMACS
11654 || attr_type_dep == TYPE_FMACD))
11656 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11657 *cost = insn_default_latency (dep) - 3;
11658 else
11659 *cost = insn_default_latency (dep);
11660 return false;
11662 else
11664 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11665 *cost = insn_default_latency (dep) + 1;
11666 else
11667 *cost = insn_default_latency (dep);
11669 return false;
11674 break;
11676 default:
11677 gcc_unreachable ();
11680 return true;
11683 /* Adjust cost hook for FA726TE. */
11684 static bool
11685 fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11687 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11688 have penalty of 3. */
11689 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11690 && recog_memoized (insn) >= 0
11691 && recog_memoized (dep) >= 0
11692 && get_attr_conds (dep) == CONDS_SET)
11694 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11695 if (get_attr_conds (insn) == CONDS_USE
11696 && get_attr_type (insn) != TYPE_BRANCH)
11698 *cost = 3;
11699 return false;
11702 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11703 || get_attr_conds (insn) == CONDS_USE)
11705 *cost = 0;
11706 return false;
11710 return true;
11713 /* Implement TARGET_REGISTER_MOVE_COST.
11715 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11716 it is typically more expensive than a single memory access. We set
11717 the cost to less than two memory accesses so that floating
11718 point to integer conversion does not go through memory. */
11721 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11722 reg_class_t from, reg_class_t to)
11724 if (TARGET_32BIT)
11726 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11727 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11728 return 15;
11729 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11730 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11731 return 4;
11732 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11733 return 20;
11734 else
11735 return 2;
11737 else
11739 if (from == HI_REGS || to == HI_REGS)
11740 return 4;
11741 else
11742 return 2;
11746 /* Implement TARGET_MEMORY_MOVE_COST. */
11749 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11750 bool in ATTRIBUTE_UNUSED)
11752 if (TARGET_32BIT)
11753 return 10;
11754 else
11756 if (GET_MODE_SIZE (mode) < 4)
11757 return 8;
11758 else
11759 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11763 /* Vectorizer cost model implementation. */
11765 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11766 static int
11767 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11768 tree vectype,
11769 int misalign ATTRIBUTE_UNUSED)
11771 unsigned elements;
11773 switch (type_of_cost)
11775 case scalar_stmt:
11776 return current_tune->vec_costs->scalar_stmt_cost;
11778 case scalar_load:
11779 return current_tune->vec_costs->scalar_load_cost;
11781 case scalar_store:
11782 return current_tune->vec_costs->scalar_store_cost;
11784 case vector_stmt:
11785 return current_tune->vec_costs->vec_stmt_cost;
11787 case vector_load:
11788 return current_tune->vec_costs->vec_align_load_cost;
11790 case vector_store:
11791 return current_tune->vec_costs->vec_store_cost;
11793 case vec_to_scalar:
11794 return current_tune->vec_costs->vec_to_scalar_cost;
11796 case scalar_to_vec:
11797 return current_tune->vec_costs->scalar_to_vec_cost;
11799 case unaligned_load:
11800 return current_tune->vec_costs->vec_unalign_load_cost;
11802 case unaligned_store:
11803 return current_tune->vec_costs->vec_unalign_store_cost;
11805 case cond_branch_taken:
11806 return current_tune->vec_costs->cond_taken_branch_cost;
11808 case cond_branch_not_taken:
11809 return current_tune->vec_costs->cond_not_taken_branch_cost;
11811 case vec_perm:
11812 case vec_promote_demote:
11813 return current_tune->vec_costs->vec_stmt_cost;
11815 case vec_construct:
11816 elements = TYPE_VECTOR_SUBPARTS (vectype);
11817 return elements / 2 + 1;
11819 default:
11820 gcc_unreachable ();
11824 /* Implement targetm.vectorize.add_stmt_cost. */
11826 static unsigned
11827 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11828 struct _stmt_vec_info *stmt_info, int misalign,
11829 enum vect_cost_model_location where)
11831 unsigned *cost = (unsigned *) data;
11832 unsigned retval = 0;
11834 if (flag_vect_cost_model)
11836 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11837 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11839 /* Statements in an inner loop relative to the loop being
11840 vectorized are weighted more heavily. The value here is
11841 arbitrary and could potentially be improved with analysis. */
11842 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11843 count *= 50; /* FIXME. */
11845 retval = (unsigned) (count * stmt_cost);
11846 cost[where] += retval;
11849 return retval;
11852 /* Return true if and only if this insn can dual-issue only as older. */
11853 static bool
11854 cortexa7_older_only (rtx_insn *insn)
11856 if (recog_memoized (insn) < 0)
11857 return false;
11859 switch (get_attr_type (insn))
11861 case TYPE_ALU_DSP_REG:
11862 case TYPE_ALU_SREG:
11863 case TYPE_ALUS_SREG:
11864 case TYPE_LOGIC_REG:
11865 case TYPE_LOGICS_REG:
11866 case TYPE_ADC_REG:
11867 case TYPE_ADCS_REG:
11868 case TYPE_ADR:
11869 case TYPE_BFM:
11870 case TYPE_REV:
11871 case TYPE_MVN_REG:
11872 case TYPE_SHIFT_IMM:
11873 case TYPE_SHIFT_REG:
11874 case TYPE_LOAD_BYTE:
11875 case TYPE_LOAD1:
11876 case TYPE_STORE1:
11877 case TYPE_FFARITHS:
11878 case TYPE_FADDS:
11879 case TYPE_FFARITHD:
11880 case TYPE_FADDD:
11881 case TYPE_FMOV:
11882 case TYPE_F_CVT:
11883 case TYPE_FCMPS:
11884 case TYPE_FCMPD:
11885 case TYPE_FCONSTS:
11886 case TYPE_FCONSTD:
11887 case TYPE_FMULS:
11888 case TYPE_FMACS:
11889 case TYPE_FMULD:
11890 case TYPE_FMACD:
11891 case TYPE_FDIVS:
11892 case TYPE_FDIVD:
11893 case TYPE_F_MRC:
11894 case TYPE_F_MRRC:
11895 case TYPE_F_FLAG:
11896 case TYPE_F_LOADS:
11897 case TYPE_F_STORES:
11898 return true;
11899 default:
11900 return false;
11904 /* Return true if and only if this insn can dual-issue as younger. */
11905 static bool
11906 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11908 if (recog_memoized (insn) < 0)
11910 if (verbose > 5)
11911 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11912 return false;
11915 switch (get_attr_type (insn))
11917 case TYPE_ALU_IMM:
11918 case TYPE_ALUS_IMM:
11919 case TYPE_LOGIC_IMM:
11920 case TYPE_LOGICS_IMM:
11921 case TYPE_EXTEND:
11922 case TYPE_MVN_IMM:
11923 case TYPE_MOV_IMM:
11924 case TYPE_MOV_REG:
11925 case TYPE_MOV_SHIFT:
11926 case TYPE_MOV_SHIFT_REG:
11927 case TYPE_BRANCH:
11928 case TYPE_CALL:
11929 return true;
11930 default:
11931 return false;
11936 /* Look for an instruction that can dual issue only as an older
11937 instruction, and move it in front of any instructions that can
11938 dual-issue as younger, while preserving the relative order of all
11939 other instructions in the ready list. This is a hueuristic to help
11940 dual-issue in later cycles, by postponing issue of more flexible
11941 instructions. This heuristic may affect dual issue opportunities
11942 in the current cycle. */
11943 static void
11944 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11945 int *n_readyp, int clock)
11947 int i;
11948 int first_older_only = -1, first_younger = -1;
11950 if (verbose > 5)
11951 fprintf (file,
11952 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11953 clock,
11954 *n_readyp);
11956 /* Traverse the ready list from the head (the instruction to issue
11957 first), and looking for the first instruction that can issue as
11958 younger and the first instruction that can dual-issue only as
11959 older. */
11960 for (i = *n_readyp - 1; i >= 0; i--)
11962 rtx_insn *insn = ready[i];
11963 if (cortexa7_older_only (insn))
11965 first_older_only = i;
11966 if (verbose > 5)
11967 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11968 break;
11970 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11971 first_younger = i;
11974 /* Nothing to reorder because either no younger insn found or insn
11975 that can dual-issue only as older appears before any insn that
11976 can dual-issue as younger. */
11977 if (first_younger == -1)
11979 if (verbose > 5)
11980 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11981 return;
11984 /* Nothing to reorder because no older-only insn in the ready list. */
11985 if (first_older_only == -1)
11987 if (verbose > 5)
11988 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11989 return;
11992 /* Move first_older_only insn before first_younger. */
11993 if (verbose > 5)
11994 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11995 INSN_UID(ready [first_older_only]),
11996 INSN_UID(ready [first_younger]));
11997 rtx_insn *first_older_only_insn = ready [first_older_only];
11998 for (i = first_older_only; i < first_younger; i++)
12000 ready[i] = ready[i+1];
12003 ready[i] = first_older_only_insn;
12004 return;
12007 /* Implement TARGET_SCHED_REORDER. */
12008 static int
12009 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
12010 int clock)
12012 switch (arm_tune)
12014 case cortexa7:
12015 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
12016 break;
12017 default:
12018 /* Do nothing for other cores. */
12019 break;
12022 return arm_issue_rate ();
12025 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12026 It corrects the value of COST based on the relationship between
12027 INSN and DEP through the dependence LINK. It returns the new
12028 value. There is a per-core adjust_cost hook to adjust scheduler costs
12029 and the per-core hook can choose to completely override the generic
12030 adjust_cost function. Only put bits of code into arm_adjust_cost that
12031 are common across all cores. */
12032 static int
12033 arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
12035 rtx i_pat, d_pat;
12037 /* When generating Thumb-1 code, we want to place flag-setting operations
12038 close to a conditional branch which depends on them, so that we can
12039 omit the comparison. */
12040 if (TARGET_THUMB1
12041 && REG_NOTE_KIND (link) == 0
12042 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12043 && recog_memoized (dep) >= 0
12044 && get_attr_conds (dep) == CONDS_SET)
12045 return 0;
12047 if (current_tune->sched_adjust_cost != NULL)
12049 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
12050 return cost;
12053 /* XXX Is this strictly true? */
12054 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
12055 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
12056 return 0;
12058 /* Call insns don't incur a stall, even if they follow a load. */
12059 if (REG_NOTE_KIND (link) == 0
12060 && CALL_P (insn))
12061 return 1;
12063 if ((i_pat = single_set (insn)) != NULL
12064 && MEM_P (SET_SRC (i_pat))
12065 && (d_pat = single_set (dep)) != NULL
12066 && MEM_P (SET_DEST (d_pat)))
12068 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12069 /* This is a load after a store, there is no conflict if the load reads
12070 from a cached area. Assume that loads from the stack, and from the
12071 constant pool are cached, and that others will miss. This is a
12072 hack. */
12074 if ((GET_CODE (src_mem) == SYMBOL_REF
12075 && CONSTANT_POOL_ADDRESS_P (src_mem))
12076 || reg_mentioned_p (stack_pointer_rtx, src_mem)
12077 || reg_mentioned_p (frame_pointer_rtx, src_mem)
12078 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12079 return 1;
12082 return cost;
12086 arm_max_conditional_execute (void)
12088 return max_insns_skipped;
12091 static int
12092 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12094 if (TARGET_32BIT)
12095 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12096 else
12097 return (optimize > 0) ? 2 : 0;
12100 static int
12101 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12103 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12106 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12107 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12108 sequences of non-executed instructions in IT blocks probably take the same
12109 amount of time as executed instructions (and the IT instruction itself takes
12110 space in icache). This function was experimentally determined to give good
12111 results on a popular embedded benchmark. */
12113 static int
12114 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12116 return (TARGET_32BIT && speed_p) ? 1
12117 : arm_default_branch_cost (speed_p, predictable_p);
12120 static int
12121 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12123 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12126 static bool fp_consts_inited = false;
12128 static REAL_VALUE_TYPE value_fp0;
12130 static void
12131 init_fp_table (void)
12133 REAL_VALUE_TYPE r;
12135 r = REAL_VALUE_ATOF ("0", DFmode);
12136 value_fp0 = r;
12137 fp_consts_inited = true;
12140 /* Return TRUE if rtx X is a valid immediate FP constant. */
12142 arm_const_double_rtx (rtx x)
12144 REAL_VALUE_TYPE r;
12146 if (!fp_consts_inited)
12147 init_fp_table ();
12149 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12150 if (REAL_VALUE_MINUS_ZERO (r))
12151 return 0;
12153 if (REAL_VALUES_EQUAL (r, value_fp0))
12154 return 1;
12156 return 0;
12159 /* VFPv3 has a fairly wide range of representable immediates, formed from
12160 "quarter-precision" floating-point values. These can be evaluated using this
12161 formula (with ^ for exponentiation):
12163 -1^s * n * 2^-r
12165 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12166 16 <= n <= 31 and 0 <= r <= 7.
12168 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12170 - A (most-significant) is the sign bit.
12171 - BCD are the exponent (encoded as r XOR 3).
12172 - EFGH are the mantissa (encoded as n - 16).
12175 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12176 fconst[sd] instruction, or -1 if X isn't suitable. */
12177 static int
12178 vfp3_const_double_index (rtx x)
12180 REAL_VALUE_TYPE r, m;
12181 int sign, exponent;
12182 unsigned HOST_WIDE_INT mantissa, mant_hi;
12183 unsigned HOST_WIDE_INT mask;
12184 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12185 bool fail;
12187 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12188 return -1;
12190 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12192 /* We can't represent these things, so detect them first. */
12193 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12194 return -1;
12196 /* Extract sign, exponent and mantissa. */
12197 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12198 r = real_value_abs (&r);
12199 exponent = REAL_EXP (&r);
12200 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12201 highest (sign) bit, with a fixed binary point at bit point_pos.
12202 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12203 bits for the mantissa, this may fail (low bits would be lost). */
12204 real_ldexp (&m, &r, point_pos - exponent);
12205 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12206 mantissa = w.elt (0);
12207 mant_hi = w.elt (1);
12209 /* If there are bits set in the low part of the mantissa, we can't
12210 represent this value. */
12211 if (mantissa != 0)
12212 return -1;
12214 /* Now make it so that mantissa contains the most-significant bits, and move
12215 the point_pos to indicate that the least-significant bits have been
12216 discarded. */
12217 point_pos -= HOST_BITS_PER_WIDE_INT;
12218 mantissa = mant_hi;
12220 /* We can permit four significant bits of mantissa only, plus a high bit
12221 which is always 1. */
12222 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12223 if ((mantissa & mask) != 0)
12224 return -1;
12226 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12227 mantissa >>= point_pos - 5;
12229 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12230 floating-point immediate zero with Neon using an integer-zero load, but
12231 that case is handled elsewhere.) */
12232 if (mantissa == 0)
12233 return -1;
12235 gcc_assert (mantissa >= 16 && mantissa <= 31);
12237 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12238 normalized significands are in the range [1, 2). (Our mantissa is shifted
12239 left 4 places at this point relative to normalized IEEE754 values). GCC
12240 internally uses [0.5, 1) (see real.c), so the exponent returned from
12241 REAL_EXP must be altered. */
12242 exponent = 5 - exponent;
12244 if (exponent < 0 || exponent > 7)
12245 return -1;
12247 /* Sign, mantissa and exponent are now in the correct form to plug into the
12248 formula described in the comment above. */
12249 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12252 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12254 vfp3_const_double_rtx (rtx x)
12256 if (!TARGET_VFP3)
12257 return 0;
12259 return vfp3_const_double_index (x) != -1;
12262 /* Recognize immediates which can be used in various Neon instructions. Legal
12263 immediates are described by the following table (for VMVN variants, the
12264 bitwise inverse of the constant shown is recognized. In either case, VMOV
12265 is output and the correct instruction to use for a given constant is chosen
12266 by the assembler). The constant shown is replicated across all elements of
12267 the destination vector.
12269 insn elems variant constant (binary)
12270 ---- ----- ------- -----------------
12271 vmov i32 0 00000000 00000000 00000000 abcdefgh
12272 vmov i32 1 00000000 00000000 abcdefgh 00000000
12273 vmov i32 2 00000000 abcdefgh 00000000 00000000
12274 vmov i32 3 abcdefgh 00000000 00000000 00000000
12275 vmov i16 4 00000000 abcdefgh
12276 vmov i16 5 abcdefgh 00000000
12277 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12278 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12279 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12280 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12281 vmvn i16 10 00000000 abcdefgh
12282 vmvn i16 11 abcdefgh 00000000
12283 vmov i32 12 00000000 00000000 abcdefgh 11111111
12284 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12285 vmov i32 14 00000000 abcdefgh 11111111 11111111
12286 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12287 vmov i8 16 abcdefgh
12288 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12289 eeeeeeee ffffffff gggggggg hhhhhhhh
12290 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12291 vmov f32 19 00000000 00000000 00000000 00000000
12293 For case 18, B = !b. Representable values are exactly those accepted by
12294 vfp3_const_double_index, but are output as floating-point numbers rather
12295 than indices.
12297 For case 19, we will change it to vmov.i32 when assembling.
12299 Variants 0-5 (inclusive) may also be used as immediates for the second
12300 operand of VORR/VBIC instructions.
12302 The INVERSE argument causes the bitwise inverse of the given operand to be
12303 recognized instead (used for recognizing legal immediates for the VAND/VORN
12304 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12305 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12306 output, rather than the real insns vbic/vorr).
12308 INVERSE makes no difference to the recognition of float vectors.
12310 The return value is the variant of immediate as shown in the above table, or
12311 -1 if the given value doesn't match any of the listed patterns.
12313 static int
12314 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12315 rtx *modconst, int *elementwidth)
12317 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12318 matches = 1; \
12319 for (i = 0; i < idx; i += (STRIDE)) \
12320 if (!(TEST)) \
12321 matches = 0; \
12322 if (matches) \
12324 immtype = (CLASS); \
12325 elsize = (ELSIZE); \
12326 break; \
12329 unsigned int i, elsize = 0, idx = 0, n_elts;
12330 unsigned int innersize;
12331 unsigned char bytes[16];
12332 int immtype = -1, matches;
12333 unsigned int invmask = inverse ? 0xff : 0;
12334 bool vector = GET_CODE (op) == CONST_VECTOR;
12336 if (vector)
12338 n_elts = CONST_VECTOR_NUNITS (op);
12339 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12341 else
12343 n_elts = 1;
12344 if (mode == VOIDmode)
12345 mode = DImode;
12346 innersize = GET_MODE_SIZE (mode);
12349 /* Vectors of float constants. */
12350 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12352 rtx el0 = CONST_VECTOR_ELT (op, 0);
12353 REAL_VALUE_TYPE r0;
12355 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12356 return -1;
12358 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12360 for (i = 1; i < n_elts; i++)
12362 rtx elt = CONST_VECTOR_ELT (op, i);
12363 REAL_VALUE_TYPE re;
12365 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12367 if (!REAL_VALUES_EQUAL (r0, re))
12368 return -1;
12371 if (modconst)
12372 *modconst = CONST_VECTOR_ELT (op, 0);
12374 if (elementwidth)
12375 *elementwidth = 0;
12377 if (el0 == CONST0_RTX (GET_MODE (el0)))
12378 return 19;
12379 else
12380 return 18;
12383 /* Splat vector constant out into a byte vector. */
12384 for (i = 0; i < n_elts; i++)
12386 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12387 unsigned HOST_WIDE_INT elpart;
12388 unsigned int part, parts;
12390 if (CONST_INT_P (el))
12392 elpart = INTVAL (el);
12393 parts = 1;
12395 else if (CONST_DOUBLE_P (el))
12397 elpart = CONST_DOUBLE_LOW (el);
12398 parts = 2;
12400 else
12401 gcc_unreachable ();
12403 for (part = 0; part < parts; part++)
12405 unsigned int byte;
12406 for (byte = 0; byte < innersize; byte++)
12408 bytes[idx++] = (elpart & 0xff) ^ invmask;
12409 elpart >>= BITS_PER_UNIT;
12411 if (CONST_DOUBLE_P (el))
12412 elpart = CONST_DOUBLE_HIGH (el);
12416 /* Sanity check. */
12417 gcc_assert (idx == GET_MODE_SIZE (mode));
12421 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12422 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12424 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12425 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12427 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12428 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12430 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12431 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12433 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12435 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12437 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12438 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12440 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12441 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12443 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12444 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12446 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12447 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12449 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12451 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12453 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12454 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12456 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12457 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12459 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12460 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12462 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12463 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12465 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12467 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12468 && bytes[i] == bytes[(i + 8) % idx]);
12470 while (0);
12472 if (immtype == -1)
12473 return -1;
12475 if (elementwidth)
12476 *elementwidth = elsize;
12478 if (modconst)
12480 unsigned HOST_WIDE_INT imm = 0;
12482 /* Un-invert bytes of recognized vector, if necessary. */
12483 if (invmask != 0)
12484 for (i = 0; i < idx; i++)
12485 bytes[i] ^= invmask;
12487 if (immtype == 17)
12489 /* FIXME: Broken on 32-bit H_W_I hosts. */
12490 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12492 for (i = 0; i < 8; i++)
12493 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12494 << (i * BITS_PER_UNIT);
12496 *modconst = GEN_INT (imm);
12498 else
12500 unsigned HOST_WIDE_INT imm = 0;
12502 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12503 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12505 *modconst = GEN_INT (imm);
12509 return immtype;
12510 #undef CHECK
12513 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12514 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12515 float elements), and a modified constant (whatever should be output for a
12516 VMOV) in *MODCONST. */
12519 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12520 rtx *modconst, int *elementwidth)
12522 rtx tmpconst;
12523 int tmpwidth;
12524 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12526 if (retval == -1)
12527 return 0;
12529 if (modconst)
12530 *modconst = tmpconst;
12532 if (elementwidth)
12533 *elementwidth = tmpwidth;
12535 return 1;
12538 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12539 the immediate is valid, write a constant suitable for using as an operand
12540 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12541 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12544 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12545 rtx *modconst, int *elementwidth)
12547 rtx tmpconst;
12548 int tmpwidth;
12549 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12551 if (retval < 0 || retval > 5)
12552 return 0;
12554 if (modconst)
12555 *modconst = tmpconst;
12557 if (elementwidth)
12558 *elementwidth = tmpwidth;
12560 return 1;
12563 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12564 the immediate is valid, write a constant suitable for using as an operand
12565 to VSHR/VSHL to *MODCONST and the corresponding element width to
12566 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12567 because they have different limitations. */
12570 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12571 rtx *modconst, int *elementwidth,
12572 bool isleftshift)
12574 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12575 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12576 unsigned HOST_WIDE_INT last_elt = 0;
12577 unsigned HOST_WIDE_INT maxshift;
12579 /* Split vector constant out into a byte vector. */
12580 for (i = 0; i < n_elts; i++)
12582 rtx el = CONST_VECTOR_ELT (op, i);
12583 unsigned HOST_WIDE_INT elpart;
12585 if (CONST_INT_P (el))
12586 elpart = INTVAL (el);
12587 else if (CONST_DOUBLE_P (el))
12588 return 0;
12589 else
12590 gcc_unreachable ();
12592 if (i != 0 && elpart != last_elt)
12593 return 0;
12595 last_elt = elpart;
12598 /* Shift less than element size. */
12599 maxshift = innersize * 8;
12601 if (isleftshift)
12603 /* Left shift immediate value can be from 0 to <size>-1. */
12604 if (last_elt >= maxshift)
12605 return 0;
12607 else
12609 /* Right shift immediate value can be from 1 to <size>. */
12610 if (last_elt == 0 || last_elt > maxshift)
12611 return 0;
12614 if (elementwidth)
12615 *elementwidth = innersize * 8;
12617 if (modconst)
12618 *modconst = CONST_VECTOR_ELT (op, 0);
12620 return 1;
12623 /* Return a string suitable for output of Neon immediate logic operation
12624 MNEM. */
12626 char *
12627 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12628 int inverse, int quad)
12630 int width, is_valid;
12631 static char templ[40];
12633 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12635 gcc_assert (is_valid != 0);
12637 if (quad)
12638 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12639 else
12640 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12642 return templ;
12645 /* Return a string suitable for output of Neon immediate shift operation
12646 (VSHR or VSHL) MNEM. */
12648 char *
12649 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12650 machine_mode mode, int quad,
12651 bool isleftshift)
12653 int width, is_valid;
12654 static char templ[40];
12656 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12657 gcc_assert (is_valid != 0);
12659 if (quad)
12660 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12661 else
12662 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12664 return templ;
12667 /* Output a sequence of pairwise operations to implement a reduction.
12668 NOTE: We do "too much work" here, because pairwise operations work on two
12669 registers-worth of operands in one go. Unfortunately we can't exploit those
12670 extra calculations to do the full operation in fewer steps, I don't think.
12671 Although all vector elements of the result but the first are ignored, we
12672 actually calculate the same result in each of the elements. An alternative
12673 such as initially loading a vector with zero to use as each of the second
12674 operands would use up an additional register and take an extra instruction,
12675 for no particular gain. */
12677 void
12678 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12679 rtx (*reduc) (rtx, rtx, rtx))
12681 machine_mode inner = GET_MODE_INNER (mode);
12682 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12683 rtx tmpsum = op1;
12685 for (i = parts / 2; i >= 1; i /= 2)
12687 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12688 emit_insn (reduc (dest, tmpsum, tmpsum));
12689 tmpsum = dest;
12693 /* If VALS is a vector constant that can be loaded into a register
12694 using VDUP, generate instructions to do so and return an RTX to
12695 assign to the register. Otherwise return NULL_RTX. */
12697 static rtx
12698 neon_vdup_constant (rtx vals)
12700 machine_mode mode = GET_MODE (vals);
12701 machine_mode inner_mode = GET_MODE_INNER (mode);
12702 int n_elts = GET_MODE_NUNITS (mode);
12703 bool all_same = true;
12704 rtx x;
12705 int i;
12707 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12708 return NULL_RTX;
12710 for (i = 0; i < n_elts; ++i)
12712 x = XVECEXP (vals, 0, i);
12713 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12714 all_same = false;
12717 if (!all_same)
12718 /* The elements are not all the same. We could handle repeating
12719 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12720 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12721 vdup.i16). */
12722 return NULL_RTX;
12724 /* We can load this constant by using VDUP and a constant in a
12725 single ARM register. This will be cheaper than a vector
12726 load. */
12728 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12729 return gen_rtx_VEC_DUPLICATE (mode, x);
12732 /* Generate code to load VALS, which is a PARALLEL containing only
12733 constants (for vec_init) or CONST_VECTOR, efficiently into a
12734 register. Returns an RTX to copy into the register, or NULL_RTX
12735 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12738 neon_make_constant (rtx vals)
12740 machine_mode mode = GET_MODE (vals);
12741 rtx target;
12742 rtx const_vec = NULL_RTX;
12743 int n_elts = GET_MODE_NUNITS (mode);
12744 int n_const = 0;
12745 int i;
12747 if (GET_CODE (vals) == CONST_VECTOR)
12748 const_vec = vals;
12749 else if (GET_CODE (vals) == PARALLEL)
12751 /* A CONST_VECTOR must contain only CONST_INTs and
12752 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12753 Only store valid constants in a CONST_VECTOR. */
12754 for (i = 0; i < n_elts; ++i)
12756 rtx x = XVECEXP (vals, 0, i);
12757 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12758 n_const++;
12760 if (n_const == n_elts)
12761 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12763 else
12764 gcc_unreachable ();
12766 if (const_vec != NULL
12767 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12768 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12769 return const_vec;
12770 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12771 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12772 pipeline cycle; creating the constant takes one or two ARM
12773 pipeline cycles. */
12774 return target;
12775 else if (const_vec != NULL_RTX)
12776 /* Load from constant pool. On Cortex-A8 this takes two cycles
12777 (for either double or quad vectors). We can not take advantage
12778 of single-cycle VLD1 because we need a PC-relative addressing
12779 mode. */
12780 return const_vec;
12781 else
12782 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12783 We can not construct an initializer. */
12784 return NULL_RTX;
12787 /* Initialize vector TARGET to VALS. */
12789 void
12790 neon_expand_vector_init (rtx target, rtx vals)
12792 machine_mode mode = GET_MODE (target);
12793 machine_mode inner_mode = GET_MODE_INNER (mode);
12794 int n_elts = GET_MODE_NUNITS (mode);
12795 int n_var = 0, one_var = -1;
12796 bool all_same = true;
12797 rtx x, mem;
12798 int i;
12800 for (i = 0; i < n_elts; ++i)
12802 x = XVECEXP (vals, 0, i);
12803 if (!CONSTANT_P (x))
12804 ++n_var, one_var = i;
12806 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12807 all_same = false;
12810 if (n_var == 0)
12812 rtx constant = neon_make_constant (vals);
12813 if (constant != NULL_RTX)
12815 emit_move_insn (target, constant);
12816 return;
12820 /* Splat a single non-constant element if we can. */
12821 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12823 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12824 emit_insn (gen_rtx_SET (VOIDmode, target,
12825 gen_rtx_VEC_DUPLICATE (mode, x)));
12826 return;
12829 /* One field is non-constant. Load constant then overwrite varying
12830 field. This is more efficient than using the stack. */
12831 if (n_var == 1)
12833 rtx copy = copy_rtx (vals);
12834 rtx index = GEN_INT (one_var);
12836 /* Load constant part of vector, substitute neighboring value for
12837 varying element. */
12838 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12839 neon_expand_vector_init (target, copy);
12841 /* Insert variable. */
12842 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12843 switch (mode)
12845 case V8QImode:
12846 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12847 break;
12848 case V16QImode:
12849 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12850 break;
12851 case V4HImode:
12852 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12853 break;
12854 case V8HImode:
12855 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12856 break;
12857 case V2SImode:
12858 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12859 break;
12860 case V4SImode:
12861 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12862 break;
12863 case V2SFmode:
12864 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12865 break;
12866 case V4SFmode:
12867 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12868 break;
12869 case V2DImode:
12870 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12871 break;
12872 default:
12873 gcc_unreachable ();
12875 return;
12878 /* Construct the vector in memory one field at a time
12879 and load the whole vector. */
12880 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12881 for (i = 0; i < n_elts; i++)
12882 emit_move_insn (adjust_address_nv (mem, inner_mode,
12883 i * GET_MODE_SIZE (inner_mode)),
12884 XVECEXP (vals, 0, i));
12885 emit_move_insn (target, mem);
12888 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12889 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12890 reported source locations are bogus. */
12892 static void
12893 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12894 const char *err)
12896 HOST_WIDE_INT lane;
12898 gcc_assert (CONST_INT_P (operand));
12900 lane = INTVAL (operand);
12902 if (lane < low || lane >= high)
12903 error (err);
12906 /* Bounds-check lanes. */
12908 void
12909 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12911 bounds_check (operand, low, high, "lane out of range");
12914 /* Bounds-check constants. */
12916 void
12917 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12919 bounds_check (operand, low, high, "constant out of range");
12922 HOST_WIDE_INT
12923 neon_element_bits (machine_mode mode)
12925 if (mode == DImode)
12926 return GET_MODE_BITSIZE (mode);
12927 else
12928 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12932 /* Predicates for `match_operand' and `match_operator'. */
12934 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12935 WB is true if full writeback address modes are allowed and is false
12936 if limited writeback address modes (POST_INC and PRE_DEC) are
12937 allowed. */
12940 arm_coproc_mem_operand (rtx op, bool wb)
12942 rtx ind;
12944 /* Reject eliminable registers. */
12945 if (! (reload_in_progress || reload_completed || lra_in_progress)
12946 && ( reg_mentioned_p (frame_pointer_rtx, op)
12947 || reg_mentioned_p (arg_pointer_rtx, op)
12948 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12949 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12950 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12951 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12952 return FALSE;
12954 /* Constants are converted into offsets from labels. */
12955 if (!MEM_P (op))
12956 return FALSE;
12958 ind = XEXP (op, 0);
12960 if (reload_completed
12961 && (GET_CODE (ind) == LABEL_REF
12962 || (GET_CODE (ind) == CONST
12963 && GET_CODE (XEXP (ind, 0)) == PLUS
12964 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12965 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12966 return TRUE;
12968 /* Match: (mem (reg)). */
12969 if (REG_P (ind))
12970 return arm_address_register_rtx_p (ind, 0);
12972 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12973 acceptable in any case (subject to verification by
12974 arm_address_register_rtx_p). We need WB to be true to accept
12975 PRE_INC and POST_DEC. */
12976 if (GET_CODE (ind) == POST_INC
12977 || GET_CODE (ind) == PRE_DEC
12978 || (wb
12979 && (GET_CODE (ind) == PRE_INC
12980 || GET_CODE (ind) == POST_DEC)))
12981 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12983 if (wb
12984 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12985 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12986 && GET_CODE (XEXP (ind, 1)) == PLUS
12987 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12988 ind = XEXP (ind, 1);
12990 /* Match:
12991 (plus (reg)
12992 (const)). */
12993 if (GET_CODE (ind) == PLUS
12994 && REG_P (XEXP (ind, 0))
12995 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12996 && CONST_INT_P (XEXP (ind, 1))
12997 && INTVAL (XEXP (ind, 1)) > -1024
12998 && INTVAL (XEXP (ind, 1)) < 1024
12999 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13000 return TRUE;
13002 return FALSE;
13005 /* Return TRUE if OP is a memory operand which we can load or store a vector
13006 to/from. TYPE is one of the following values:
13007 0 - Vector load/stor (vldr)
13008 1 - Core registers (ldm)
13009 2 - Element/structure loads (vld1)
13012 neon_vector_mem_operand (rtx op, int type, bool strict)
13014 rtx ind;
13016 /* Reject eliminable registers. */
13017 if (! (reload_in_progress || reload_completed)
13018 && ( reg_mentioned_p (frame_pointer_rtx, op)
13019 || reg_mentioned_p (arg_pointer_rtx, op)
13020 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13021 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13022 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13023 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13024 return !strict;
13026 /* Constants are converted into offsets from labels. */
13027 if (!MEM_P (op))
13028 return FALSE;
13030 ind = XEXP (op, 0);
13032 if (reload_completed
13033 && (GET_CODE (ind) == LABEL_REF
13034 || (GET_CODE (ind) == CONST
13035 && GET_CODE (XEXP (ind, 0)) == PLUS
13036 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13037 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13038 return TRUE;
13040 /* Match: (mem (reg)). */
13041 if (REG_P (ind))
13042 return arm_address_register_rtx_p (ind, 0);
13044 /* Allow post-increment with Neon registers. */
13045 if ((type != 1 && GET_CODE (ind) == POST_INC)
13046 || (type == 0 && GET_CODE (ind) == PRE_DEC))
13047 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13049 /* Allow post-increment by register for VLDn */
13050 if (type == 2 && GET_CODE (ind) == POST_MODIFY
13051 && GET_CODE (XEXP (ind, 1)) == PLUS
13052 && REG_P (XEXP (XEXP (ind, 1), 1)))
13053 return true;
13055 /* Match:
13056 (plus (reg)
13057 (const)). */
13058 if (type == 0
13059 && GET_CODE (ind) == PLUS
13060 && REG_P (XEXP (ind, 0))
13061 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13062 && CONST_INT_P (XEXP (ind, 1))
13063 && INTVAL (XEXP (ind, 1)) > -1024
13064 /* For quad modes, we restrict the constant offset to be slightly less
13065 than what the instruction format permits. We have no such constraint
13066 on double mode offsets. (This must match arm_legitimate_index_p.) */
13067 && (INTVAL (XEXP (ind, 1))
13068 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13069 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13070 return TRUE;
13072 return FALSE;
13075 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13076 type. */
13078 neon_struct_mem_operand (rtx op)
13080 rtx ind;
13082 /* Reject eliminable registers. */
13083 if (! (reload_in_progress || reload_completed)
13084 && ( reg_mentioned_p (frame_pointer_rtx, op)
13085 || reg_mentioned_p (arg_pointer_rtx, op)
13086 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13087 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13088 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13089 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13090 return FALSE;
13092 /* Constants are converted into offsets from labels. */
13093 if (!MEM_P (op))
13094 return FALSE;
13096 ind = XEXP (op, 0);
13098 if (reload_completed
13099 && (GET_CODE (ind) == LABEL_REF
13100 || (GET_CODE (ind) == CONST
13101 && GET_CODE (XEXP (ind, 0)) == PLUS
13102 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13103 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13104 return TRUE;
13106 /* Match: (mem (reg)). */
13107 if (REG_P (ind))
13108 return arm_address_register_rtx_p (ind, 0);
13110 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13111 if (GET_CODE (ind) == POST_INC
13112 || GET_CODE (ind) == PRE_DEC)
13113 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13115 return FALSE;
13118 /* Return true if X is a register that will be eliminated later on. */
13120 arm_eliminable_register (rtx x)
13122 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13123 || REGNO (x) == ARG_POINTER_REGNUM
13124 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13125 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13128 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13129 coprocessor registers. Otherwise return NO_REGS. */
13131 enum reg_class
13132 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13134 if (mode == HFmode)
13136 if (!TARGET_NEON_FP16)
13137 return GENERAL_REGS;
13138 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13139 return NO_REGS;
13140 return GENERAL_REGS;
13143 /* The neon move patterns handle all legitimate vector and struct
13144 addresses. */
13145 if (TARGET_NEON
13146 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13147 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13148 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13149 || VALID_NEON_STRUCT_MODE (mode)))
13150 return NO_REGS;
13152 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13153 return NO_REGS;
13155 return GENERAL_REGS;
13158 /* Values which must be returned in the most-significant end of the return
13159 register. */
13161 static bool
13162 arm_return_in_msb (const_tree valtype)
13164 return (TARGET_AAPCS_BASED
13165 && BYTES_BIG_ENDIAN
13166 && (AGGREGATE_TYPE_P (valtype)
13167 || TREE_CODE (valtype) == COMPLEX_TYPE
13168 || FIXED_POINT_TYPE_P (valtype)));
13171 /* Return TRUE if X references a SYMBOL_REF. */
13173 symbol_mentioned_p (rtx x)
13175 const char * fmt;
13176 int i;
13178 if (GET_CODE (x) == SYMBOL_REF)
13179 return 1;
13181 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13182 are constant offsets, not symbols. */
13183 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13184 return 0;
13186 fmt = GET_RTX_FORMAT (GET_CODE (x));
13188 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13190 if (fmt[i] == 'E')
13192 int j;
13194 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13195 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13196 return 1;
13198 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13199 return 1;
13202 return 0;
13205 /* Return TRUE if X references a LABEL_REF. */
13207 label_mentioned_p (rtx x)
13209 const char * fmt;
13210 int i;
13212 if (GET_CODE (x) == LABEL_REF)
13213 return 1;
13215 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13216 instruction, but they are constant offsets, not symbols. */
13217 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13218 return 0;
13220 fmt = GET_RTX_FORMAT (GET_CODE (x));
13221 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13223 if (fmt[i] == 'E')
13225 int j;
13227 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13228 if (label_mentioned_p (XVECEXP (x, i, j)))
13229 return 1;
13231 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13232 return 1;
13235 return 0;
13239 tls_mentioned_p (rtx x)
13241 switch (GET_CODE (x))
13243 case CONST:
13244 return tls_mentioned_p (XEXP (x, 0));
13246 case UNSPEC:
13247 if (XINT (x, 1) == UNSPEC_TLS)
13248 return 1;
13250 default:
13251 return 0;
13255 /* Must not copy any rtx that uses a pc-relative address. */
13257 static bool
13258 arm_cannot_copy_insn_p (rtx_insn *insn)
13260 /* The tls call insn cannot be copied, as it is paired with a data
13261 word. */
13262 if (recog_memoized (insn) == CODE_FOR_tlscall)
13263 return true;
13265 subrtx_iterator::array_type array;
13266 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13268 const_rtx x = *iter;
13269 if (GET_CODE (x) == UNSPEC
13270 && (XINT (x, 1) == UNSPEC_PIC_BASE
13271 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13272 return true;
13274 return false;
13277 enum rtx_code
13278 minmax_code (rtx x)
13280 enum rtx_code code = GET_CODE (x);
13282 switch (code)
13284 case SMAX:
13285 return GE;
13286 case SMIN:
13287 return LE;
13288 case UMIN:
13289 return LEU;
13290 case UMAX:
13291 return GEU;
13292 default:
13293 gcc_unreachable ();
13297 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13299 bool
13300 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13301 int *mask, bool *signed_sat)
13303 /* The high bound must be a power of two minus one. */
13304 int log = exact_log2 (INTVAL (hi_bound) + 1);
13305 if (log == -1)
13306 return false;
13308 /* The low bound is either zero (for usat) or one less than the
13309 negation of the high bound (for ssat). */
13310 if (INTVAL (lo_bound) == 0)
13312 if (mask)
13313 *mask = log;
13314 if (signed_sat)
13315 *signed_sat = false;
13317 return true;
13320 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13322 if (mask)
13323 *mask = log + 1;
13324 if (signed_sat)
13325 *signed_sat = true;
13327 return true;
13330 return false;
13333 /* Return 1 if memory locations are adjacent. */
13335 adjacent_mem_locations (rtx a, rtx b)
13337 /* We don't guarantee to preserve the order of these memory refs. */
13338 if (volatile_refs_p (a) || volatile_refs_p (b))
13339 return 0;
13341 if ((REG_P (XEXP (a, 0))
13342 || (GET_CODE (XEXP (a, 0)) == PLUS
13343 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13344 && (REG_P (XEXP (b, 0))
13345 || (GET_CODE (XEXP (b, 0)) == PLUS
13346 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13348 HOST_WIDE_INT val0 = 0, val1 = 0;
13349 rtx reg0, reg1;
13350 int val_diff;
13352 if (GET_CODE (XEXP (a, 0)) == PLUS)
13354 reg0 = XEXP (XEXP (a, 0), 0);
13355 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13357 else
13358 reg0 = XEXP (a, 0);
13360 if (GET_CODE (XEXP (b, 0)) == PLUS)
13362 reg1 = XEXP (XEXP (b, 0), 0);
13363 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13365 else
13366 reg1 = XEXP (b, 0);
13368 /* Don't accept any offset that will require multiple
13369 instructions to handle, since this would cause the
13370 arith_adjacentmem pattern to output an overlong sequence. */
13371 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13372 return 0;
13374 /* Don't allow an eliminable register: register elimination can make
13375 the offset too large. */
13376 if (arm_eliminable_register (reg0))
13377 return 0;
13379 val_diff = val1 - val0;
13381 if (arm_ld_sched)
13383 /* If the target has load delay slots, then there's no benefit
13384 to using an ldm instruction unless the offset is zero and
13385 we are optimizing for size. */
13386 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13387 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13388 && (val_diff == 4 || val_diff == -4));
13391 return ((REGNO (reg0) == REGNO (reg1))
13392 && (val_diff == 4 || val_diff == -4));
13395 return 0;
13398 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13399 for load operations, false for store operations. CONSECUTIVE is true
13400 if the register numbers in the operation must be consecutive in the register
13401 bank. RETURN_PC is true if value is to be loaded in PC.
13402 The pattern we are trying to match for load is:
13403 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13404 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13407 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13409 where
13410 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13411 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13412 3. If consecutive is TRUE, then for kth register being loaded,
13413 REGNO (R_dk) = REGNO (R_d0) + k.
13414 The pattern for store is similar. */
13415 bool
13416 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13417 bool consecutive, bool return_pc)
13419 HOST_WIDE_INT count = XVECLEN (op, 0);
13420 rtx reg, mem, addr;
13421 unsigned regno;
13422 unsigned first_regno;
13423 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13424 rtx elt;
13425 bool addr_reg_in_reglist = false;
13426 bool update = false;
13427 int reg_increment;
13428 int offset_adj;
13429 int regs_per_val;
13431 /* If not in SImode, then registers must be consecutive
13432 (e.g., VLDM instructions for DFmode). */
13433 gcc_assert ((mode == SImode) || consecutive);
13434 /* Setting return_pc for stores is illegal. */
13435 gcc_assert (!return_pc || load);
13437 /* Set up the increments and the regs per val based on the mode. */
13438 reg_increment = GET_MODE_SIZE (mode);
13439 regs_per_val = reg_increment / 4;
13440 offset_adj = return_pc ? 1 : 0;
13442 if (count <= 1
13443 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13444 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13445 return false;
13447 /* Check if this is a write-back. */
13448 elt = XVECEXP (op, 0, offset_adj);
13449 if (GET_CODE (SET_SRC (elt)) == PLUS)
13451 i++;
13452 base = 1;
13453 update = true;
13455 /* The offset adjustment must be the number of registers being
13456 popped times the size of a single register. */
13457 if (!REG_P (SET_DEST (elt))
13458 || !REG_P (XEXP (SET_SRC (elt), 0))
13459 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13460 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13461 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13462 ((count - 1 - offset_adj) * reg_increment))
13463 return false;
13466 i = i + offset_adj;
13467 base = base + offset_adj;
13468 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13469 success depends on the type: VLDM can do just one reg,
13470 LDM must do at least two. */
13471 if ((count <= i) && (mode == SImode))
13472 return false;
13474 elt = XVECEXP (op, 0, i - 1);
13475 if (GET_CODE (elt) != SET)
13476 return false;
13478 if (load)
13480 reg = SET_DEST (elt);
13481 mem = SET_SRC (elt);
13483 else
13485 reg = SET_SRC (elt);
13486 mem = SET_DEST (elt);
13489 if (!REG_P (reg) || !MEM_P (mem))
13490 return false;
13492 regno = REGNO (reg);
13493 first_regno = regno;
13494 addr = XEXP (mem, 0);
13495 if (GET_CODE (addr) == PLUS)
13497 if (!CONST_INT_P (XEXP (addr, 1)))
13498 return false;
13500 offset = INTVAL (XEXP (addr, 1));
13501 addr = XEXP (addr, 0);
13504 if (!REG_P (addr))
13505 return false;
13507 /* Don't allow SP to be loaded unless it is also the base register. It
13508 guarantees that SP is reset correctly when an LDM instruction
13509 is interrupted. Otherwise, we might end up with a corrupt stack. */
13510 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13511 return false;
13513 for (; i < count; i++)
13515 elt = XVECEXP (op, 0, i);
13516 if (GET_CODE (elt) != SET)
13517 return false;
13519 if (load)
13521 reg = SET_DEST (elt);
13522 mem = SET_SRC (elt);
13524 else
13526 reg = SET_SRC (elt);
13527 mem = SET_DEST (elt);
13530 if (!REG_P (reg)
13531 || GET_MODE (reg) != mode
13532 || REGNO (reg) <= regno
13533 || (consecutive
13534 && (REGNO (reg) !=
13535 (unsigned int) (first_regno + regs_per_val * (i - base))))
13536 /* Don't allow SP to be loaded unless it is also the base register. It
13537 guarantees that SP is reset correctly when an LDM instruction
13538 is interrupted. Otherwise, we might end up with a corrupt stack. */
13539 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13540 || !MEM_P (mem)
13541 || GET_MODE (mem) != mode
13542 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13543 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13544 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13545 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13546 offset + (i - base) * reg_increment))
13547 && (!REG_P (XEXP (mem, 0))
13548 || offset + (i - base) * reg_increment != 0)))
13549 return false;
13551 regno = REGNO (reg);
13552 if (regno == REGNO (addr))
13553 addr_reg_in_reglist = true;
13556 if (load)
13558 if (update && addr_reg_in_reglist)
13559 return false;
13561 /* For Thumb-1, address register is always modified - either by write-back
13562 or by explicit load. If the pattern does not describe an update,
13563 then the address register must be in the list of loaded registers. */
13564 if (TARGET_THUMB1)
13565 return update || addr_reg_in_reglist;
13568 return true;
13571 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13572 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13573 instruction. ADD_OFFSET is nonzero if the base address register needs
13574 to be modified with an add instruction before we can use it. */
13576 static bool
13577 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13578 int nops, HOST_WIDE_INT add_offset)
13580 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13581 if the offset isn't small enough. The reason 2 ldrs are faster
13582 is because these ARMs are able to do more than one cache access
13583 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13584 whilst the ARM8 has a double bandwidth cache. This means that
13585 these cores can do both an instruction fetch and a data fetch in
13586 a single cycle, so the trick of calculating the address into a
13587 scratch register (one of the result regs) and then doing a load
13588 multiple actually becomes slower (and no smaller in code size).
13589 That is the transformation
13591 ldr rd1, [rbase + offset]
13592 ldr rd2, [rbase + offset + 4]
13596 add rd1, rbase, offset
13597 ldmia rd1, {rd1, rd2}
13599 produces worse code -- '3 cycles + any stalls on rd2' instead of
13600 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13601 access per cycle, the first sequence could never complete in less
13602 than 6 cycles, whereas the ldm sequence would only take 5 and
13603 would make better use of sequential accesses if not hitting the
13604 cache.
13606 We cheat here and test 'arm_ld_sched' which we currently know to
13607 only be true for the ARM8, ARM9 and StrongARM. If this ever
13608 changes, then the test below needs to be reworked. */
13609 if (nops == 2 && arm_ld_sched && add_offset != 0)
13610 return false;
13612 /* XScale has load-store double instructions, but they have stricter
13613 alignment requirements than load-store multiple, so we cannot
13614 use them.
13616 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13617 the pipeline until completion.
13619 NREGS CYCLES
13625 An ldr instruction takes 1-3 cycles, but does not block the
13626 pipeline.
13628 NREGS CYCLES
13629 1 1-3
13630 2 2-6
13631 3 3-9
13632 4 4-12
13634 Best case ldr will always win. However, the more ldr instructions
13635 we issue, the less likely we are to be able to schedule them well.
13636 Using ldr instructions also increases code size.
13638 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13639 for counts of 3 or 4 regs. */
13640 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13641 return false;
13642 return true;
13645 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13646 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13647 an array ORDER which describes the sequence to use when accessing the
13648 offsets that produces an ascending order. In this sequence, each
13649 offset must be larger by exactly 4 than the previous one. ORDER[0]
13650 must have been filled in with the lowest offset by the caller.
13651 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13652 we use to verify that ORDER produces an ascending order of registers.
13653 Return true if it was possible to construct such an order, false if
13654 not. */
13656 static bool
13657 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13658 int *unsorted_regs)
13660 int i;
13661 for (i = 1; i < nops; i++)
13663 int j;
13665 order[i] = order[i - 1];
13666 for (j = 0; j < nops; j++)
13667 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13669 /* We must find exactly one offset that is higher than the
13670 previous one by 4. */
13671 if (order[i] != order[i - 1])
13672 return false;
13673 order[i] = j;
13675 if (order[i] == order[i - 1])
13676 return false;
13677 /* The register numbers must be ascending. */
13678 if (unsorted_regs != NULL
13679 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13680 return false;
13682 return true;
13685 /* Used to determine in a peephole whether a sequence of load
13686 instructions can be changed into a load-multiple instruction.
13687 NOPS is the number of separate load instructions we are examining. The
13688 first NOPS entries in OPERANDS are the destination registers, the
13689 next NOPS entries are memory operands. If this function is
13690 successful, *BASE is set to the common base register of the memory
13691 accesses; *LOAD_OFFSET is set to the first memory location's offset
13692 from that base register.
13693 REGS is an array filled in with the destination register numbers.
13694 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13695 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13696 the sequence of registers in REGS matches the loads from ascending memory
13697 locations, and the function verifies that the register numbers are
13698 themselves ascending. If CHECK_REGS is false, the register numbers
13699 are stored in the order they are found in the operands. */
13700 static int
13701 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13702 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13704 int unsorted_regs[MAX_LDM_STM_OPS];
13705 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13706 int order[MAX_LDM_STM_OPS];
13707 rtx base_reg_rtx = NULL;
13708 int base_reg = -1;
13709 int i, ldm_case;
13711 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13712 easily extended if required. */
13713 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13715 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13717 /* Loop over the operands and check that the memory references are
13718 suitable (i.e. immediate offsets from the same base register). At
13719 the same time, extract the target register, and the memory
13720 offsets. */
13721 for (i = 0; i < nops; i++)
13723 rtx reg;
13724 rtx offset;
13726 /* Convert a subreg of a mem into the mem itself. */
13727 if (GET_CODE (operands[nops + i]) == SUBREG)
13728 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13730 gcc_assert (MEM_P (operands[nops + i]));
13732 /* Don't reorder volatile memory references; it doesn't seem worth
13733 looking for the case where the order is ok anyway. */
13734 if (MEM_VOLATILE_P (operands[nops + i]))
13735 return 0;
13737 offset = const0_rtx;
13739 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13740 || (GET_CODE (reg) == SUBREG
13741 && REG_P (reg = SUBREG_REG (reg))))
13742 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13743 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13744 || (GET_CODE (reg) == SUBREG
13745 && REG_P (reg = SUBREG_REG (reg))))
13746 && (CONST_INT_P (offset
13747 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13749 if (i == 0)
13751 base_reg = REGNO (reg);
13752 base_reg_rtx = reg;
13753 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13754 return 0;
13756 else if (base_reg != (int) REGNO (reg))
13757 /* Not addressed from the same base register. */
13758 return 0;
13760 unsorted_regs[i] = (REG_P (operands[i])
13761 ? REGNO (operands[i])
13762 : REGNO (SUBREG_REG (operands[i])));
13764 /* If it isn't an integer register, or if it overwrites the
13765 base register but isn't the last insn in the list, then
13766 we can't do this. */
13767 if (unsorted_regs[i] < 0
13768 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13769 || unsorted_regs[i] > 14
13770 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13771 return 0;
13773 /* Don't allow SP to be loaded unless it is also the base
13774 register. It guarantees that SP is reset correctly when
13775 an LDM instruction is interrupted. Otherwise, we might
13776 end up with a corrupt stack. */
13777 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13778 return 0;
13780 unsorted_offsets[i] = INTVAL (offset);
13781 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13782 order[0] = i;
13784 else
13785 /* Not a suitable memory address. */
13786 return 0;
13789 /* All the useful information has now been extracted from the
13790 operands into unsorted_regs and unsorted_offsets; additionally,
13791 order[0] has been set to the lowest offset in the list. Sort
13792 the offsets into order, verifying that they are adjacent, and
13793 check that the register numbers are ascending. */
13794 if (!compute_offset_order (nops, unsorted_offsets, order,
13795 check_regs ? unsorted_regs : NULL))
13796 return 0;
13798 if (saved_order)
13799 memcpy (saved_order, order, sizeof order);
13801 if (base)
13803 *base = base_reg;
13805 for (i = 0; i < nops; i++)
13806 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13808 *load_offset = unsorted_offsets[order[0]];
13811 if (TARGET_THUMB1
13812 && !peep2_reg_dead_p (nops, base_reg_rtx))
13813 return 0;
13815 if (unsorted_offsets[order[0]] == 0)
13816 ldm_case = 1; /* ldmia */
13817 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13818 ldm_case = 2; /* ldmib */
13819 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13820 ldm_case = 3; /* ldmda */
13821 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13822 ldm_case = 4; /* ldmdb */
13823 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13824 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13825 ldm_case = 5;
13826 else
13827 return 0;
13829 if (!multiple_operation_profitable_p (false, nops,
13830 ldm_case == 5
13831 ? unsorted_offsets[order[0]] : 0))
13832 return 0;
13834 return ldm_case;
13837 /* Used to determine in a peephole whether a sequence of store instructions can
13838 be changed into a store-multiple instruction.
13839 NOPS is the number of separate store instructions we are examining.
13840 NOPS_TOTAL is the total number of instructions recognized by the peephole
13841 pattern.
13842 The first NOPS entries in OPERANDS are the source registers, the next
13843 NOPS entries are memory operands. If this function is successful, *BASE is
13844 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13845 to the first memory location's offset from that base register. REGS is an
13846 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13847 likewise filled with the corresponding rtx's.
13848 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13849 numbers to an ascending order of stores.
13850 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13851 from ascending memory locations, and the function verifies that the register
13852 numbers are themselves ascending. If CHECK_REGS is false, the register
13853 numbers are stored in the order they are found in the operands. */
13854 static int
13855 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13856 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13857 HOST_WIDE_INT *load_offset, bool check_regs)
13859 int unsorted_regs[MAX_LDM_STM_OPS];
13860 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13861 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13862 int order[MAX_LDM_STM_OPS];
13863 int base_reg = -1;
13864 rtx base_reg_rtx = NULL;
13865 int i, stm_case;
13867 /* Write back of base register is currently only supported for Thumb 1. */
13868 int base_writeback = TARGET_THUMB1;
13870 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13871 easily extended if required. */
13872 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13874 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13876 /* Loop over the operands and check that the memory references are
13877 suitable (i.e. immediate offsets from the same base register). At
13878 the same time, extract the target register, and the memory
13879 offsets. */
13880 for (i = 0; i < nops; i++)
13882 rtx reg;
13883 rtx offset;
13885 /* Convert a subreg of a mem into the mem itself. */
13886 if (GET_CODE (operands[nops + i]) == SUBREG)
13887 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13889 gcc_assert (MEM_P (operands[nops + i]));
13891 /* Don't reorder volatile memory references; it doesn't seem worth
13892 looking for the case where the order is ok anyway. */
13893 if (MEM_VOLATILE_P (operands[nops + i]))
13894 return 0;
13896 offset = const0_rtx;
13898 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13899 || (GET_CODE (reg) == SUBREG
13900 && REG_P (reg = SUBREG_REG (reg))))
13901 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13902 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13903 || (GET_CODE (reg) == SUBREG
13904 && REG_P (reg = SUBREG_REG (reg))))
13905 && (CONST_INT_P (offset
13906 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13908 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13909 ? operands[i] : SUBREG_REG (operands[i]));
13910 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13912 if (i == 0)
13914 base_reg = REGNO (reg);
13915 base_reg_rtx = reg;
13916 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13917 return 0;
13919 else if (base_reg != (int) REGNO (reg))
13920 /* Not addressed from the same base register. */
13921 return 0;
13923 /* If it isn't an integer register, then we can't do this. */
13924 if (unsorted_regs[i] < 0
13925 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13926 /* The effects are unpredictable if the base register is
13927 both updated and stored. */
13928 || (base_writeback && unsorted_regs[i] == base_reg)
13929 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13930 || unsorted_regs[i] > 14)
13931 return 0;
13933 unsorted_offsets[i] = INTVAL (offset);
13934 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13935 order[0] = i;
13937 else
13938 /* Not a suitable memory address. */
13939 return 0;
13942 /* All the useful information has now been extracted from the
13943 operands into unsorted_regs and unsorted_offsets; additionally,
13944 order[0] has been set to the lowest offset in the list. Sort
13945 the offsets into order, verifying that they are adjacent, and
13946 check that the register numbers are ascending. */
13947 if (!compute_offset_order (nops, unsorted_offsets, order,
13948 check_regs ? unsorted_regs : NULL))
13949 return 0;
13951 if (saved_order)
13952 memcpy (saved_order, order, sizeof order);
13954 if (base)
13956 *base = base_reg;
13958 for (i = 0; i < nops; i++)
13960 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13961 if (reg_rtxs)
13962 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13965 *load_offset = unsorted_offsets[order[0]];
13968 if (TARGET_THUMB1
13969 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13970 return 0;
13972 if (unsorted_offsets[order[0]] == 0)
13973 stm_case = 1; /* stmia */
13974 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13975 stm_case = 2; /* stmib */
13976 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13977 stm_case = 3; /* stmda */
13978 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13979 stm_case = 4; /* stmdb */
13980 else
13981 return 0;
13983 if (!multiple_operation_profitable_p (false, nops, 0))
13984 return 0;
13986 return stm_case;
13989 /* Routines for use in generating RTL. */
13991 /* Generate a load-multiple instruction. COUNT is the number of loads in
13992 the instruction; REGS and MEMS are arrays containing the operands.
13993 BASEREG is the base register to be used in addressing the memory operands.
13994 WBACK_OFFSET is nonzero if the instruction should update the base
13995 register. */
13997 static rtx
13998 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13999 HOST_WIDE_INT wback_offset)
14001 int i = 0, j;
14002 rtx result;
14004 if (!multiple_operation_profitable_p (false, count, 0))
14006 rtx seq;
14008 start_sequence ();
14010 for (i = 0; i < count; i++)
14011 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
14013 if (wback_offset != 0)
14014 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14016 seq = get_insns ();
14017 end_sequence ();
14019 return seq;
14022 result = gen_rtx_PARALLEL (VOIDmode,
14023 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14024 if (wback_offset != 0)
14026 XVECEXP (result, 0, 0)
14027 = gen_rtx_SET (VOIDmode, basereg,
14028 plus_constant (Pmode, basereg, wback_offset));
14029 i = 1;
14030 count++;
14033 for (j = 0; i < count; i++, j++)
14034 XVECEXP (result, 0, i)
14035 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
14037 return result;
14040 /* Generate a store-multiple instruction. COUNT is the number of stores in
14041 the instruction; REGS and MEMS are arrays containing the operands.
14042 BASEREG is the base register to be used in addressing the memory operands.
14043 WBACK_OFFSET is nonzero if the instruction should update the base
14044 register. */
14046 static rtx
14047 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14048 HOST_WIDE_INT wback_offset)
14050 int i = 0, j;
14051 rtx result;
14053 if (GET_CODE (basereg) == PLUS)
14054 basereg = XEXP (basereg, 0);
14056 if (!multiple_operation_profitable_p (false, count, 0))
14058 rtx seq;
14060 start_sequence ();
14062 for (i = 0; i < count; i++)
14063 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14065 if (wback_offset != 0)
14066 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14068 seq = get_insns ();
14069 end_sequence ();
14071 return seq;
14074 result = gen_rtx_PARALLEL (VOIDmode,
14075 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14076 if (wback_offset != 0)
14078 XVECEXP (result, 0, 0)
14079 = gen_rtx_SET (VOIDmode, basereg,
14080 plus_constant (Pmode, basereg, wback_offset));
14081 i = 1;
14082 count++;
14085 for (j = 0; i < count; i++, j++)
14086 XVECEXP (result, 0, i)
14087 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
14089 return result;
14092 /* Generate either a load-multiple or a store-multiple instruction. This
14093 function can be used in situations where we can start with a single MEM
14094 rtx and adjust its address upwards.
14095 COUNT is the number of operations in the instruction, not counting a
14096 possible update of the base register. REGS is an array containing the
14097 register operands.
14098 BASEREG is the base register to be used in addressing the memory operands,
14099 which are constructed from BASEMEM.
14100 WRITE_BACK specifies whether the generated instruction should include an
14101 update of the base register.
14102 OFFSETP is used to pass an offset to and from this function; this offset
14103 is not used when constructing the address (instead BASEMEM should have an
14104 appropriate offset in its address), it is used only for setting
14105 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14107 static rtx
14108 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14109 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14111 rtx mems[MAX_LDM_STM_OPS];
14112 HOST_WIDE_INT offset = *offsetp;
14113 int i;
14115 gcc_assert (count <= MAX_LDM_STM_OPS);
14117 if (GET_CODE (basereg) == PLUS)
14118 basereg = XEXP (basereg, 0);
14120 for (i = 0; i < count; i++)
14122 rtx addr = plus_constant (Pmode, basereg, i * 4);
14123 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14124 offset += 4;
14127 if (write_back)
14128 *offsetp = offset;
14130 if (is_load)
14131 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14132 write_back ? 4 * count : 0);
14133 else
14134 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14135 write_back ? 4 * count : 0);
14139 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14140 rtx basemem, HOST_WIDE_INT *offsetp)
14142 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14143 offsetp);
14147 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14148 rtx basemem, HOST_WIDE_INT *offsetp)
14150 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14151 offsetp);
14154 /* Called from a peephole2 expander to turn a sequence of loads into an
14155 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14156 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14157 is true if we can reorder the registers because they are used commutatively
14158 subsequently.
14159 Returns true iff we could generate a new instruction. */
14161 bool
14162 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14164 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14165 rtx mems[MAX_LDM_STM_OPS];
14166 int i, j, base_reg;
14167 rtx base_reg_rtx;
14168 HOST_WIDE_INT offset;
14169 int write_back = FALSE;
14170 int ldm_case;
14171 rtx addr;
14173 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14174 &base_reg, &offset, !sort_regs);
14176 if (ldm_case == 0)
14177 return false;
14179 if (sort_regs)
14180 for (i = 0; i < nops - 1; i++)
14181 for (j = i + 1; j < nops; j++)
14182 if (regs[i] > regs[j])
14184 int t = regs[i];
14185 regs[i] = regs[j];
14186 regs[j] = t;
14188 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14190 if (TARGET_THUMB1)
14192 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14193 gcc_assert (ldm_case == 1 || ldm_case == 5);
14194 write_back = TRUE;
14197 if (ldm_case == 5)
14199 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14200 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14201 offset = 0;
14202 if (!TARGET_THUMB1)
14204 base_reg = regs[0];
14205 base_reg_rtx = newbase;
14209 for (i = 0; i < nops; i++)
14211 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14212 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14213 SImode, addr, 0);
14215 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14216 write_back ? offset + i * 4 : 0));
14217 return true;
14220 /* Called from a peephole2 expander to turn a sequence of stores into an
14221 STM instruction. OPERANDS are the operands found by the peephole matcher;
14222 NOPS indicates how many separate stores we are trying to combine.
14223 Returns true iff we could generate a new instruction. */
14225 bool
14226 gen_stm_seq (rtx *operands, int nops)
14228 int i;
14229 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14230 rtx mems[MAX_LDM_STM_OPS];
14231 int base_reg;
14232 rtx base_reg_rtx;
14233 HOST_WIDE_INT offset;
14234 int write_back = FALSE;
14235 int stm_case;
14236 rtx addr;
14237 bool base_reg_dies;
14239 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14240 mem_order, &base_reg, &offset, true);
14242 if (stm_case == 0)
14243 return false;
14245 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14247 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14248 if (TARGET_THUMB1)
14250 gcc_assert (base_reg_dies);
14251 write_back = TRUE;
14254 if (stm_case == 5)
14256 gcc_assert (base_reg_dies);
14257 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14258 offset = 0;
14261 addr = plus_constant (Pmode, base_reg_rtx, offset);
14263 for (i = 0; i < nops; i++)
14265 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14266 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14267 SImode, addr, 0);
14269 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14270 write_back ? offset + i * 4 : 0));
14271 return true;
14274 /* Called from a peephole2 expander to turn a sequence of stores that are
14275 preceded by constant loads into an STM instruction. OPERANDS are the
14276 operands found by the peephole matcher; NOPS indicates how many
14277 separate stores we are trying to combine; there are 2 * NOPS
14278 instructions in the peephole.
14279 Returns true iff we could generate a new instruction. */
14281 bool
14282 gen_const_stm_seq (rtx *operands, int nops)
14284 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14285 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14286 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14287 rtx mems[MAX_LDM_STM_OPS];
14288 int base_reg;
14289 rtx base_reg_rtx;
14290 HOST_WIDE_INT offset;
14291 int write_back = FALSE;
14292 int stm_case;
14293 rtx addr;
14294 bool base_reg_dies;
14295 int i, j;
14296 HARD_REG_SET allocated;
14298 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14299 mem_order, &base_reg, &offset, false);
14301 if (stm_case == 0)
14302 return false;
14304 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14306 /* If the same register is used more than once, try to find a free
14307 register. */
14308 CLEAR_HARD_REG_SET (allocated);
14309 for (i = 0; i < nops; i++)
14311 for (j = i + 1; j < nops; j++)
14312 if (regs[i] == regs[j])
14314 rtx t = peep2_find_free_register (0, nops * 2,
14315 TARGET_THUMB1 ? "l" : "r",
14316 SImode, &allocated);
14317 if (t == NULL_RTX)
14318 return false;
14319 reg_rtxs[i] = t;
14320 regs[i] = REGNO (t);
14324 /* Compute an ordering that maps the register numbers to an ascending
14325 sequence. */
14326 reg_order[0] = 0;
14327 for (i = 0; i < nops; i++)
14328 if (regs[i] < regs[reg_order[0]])
14329 reg_order[0] = i;
14331 for (i = 1; i < nops; i++)
14333 int this_order = reg_order[i - 1];
14334 for (j = 0; j < nops; j++)
14335 if (regs[j] > regs[reg_order[i - 1]]
14336 && (this_order == reg_order[i - 1]
14337 || regs[j] < regs[this_order]))
14338 this_order = j;
14339 reg_order[i] = this_order;
14342 /* Ensure that registers that must be live after the instruction end
14343 up with the correct value. */
14344 for (i = 0; i < nops; i++)
14346 int this_order = reg_order[i];
14347 if ((this_order != mem_order[i]
14348 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14349 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14350 return false;
14353 /* Load the constants. */
14354 for (i = 0; i < nops; i++)
14356 rtx op = operands[2 * nops + mem_order[i]];
14357 sorted_regs[i] = regs[reg_order[i]];
14358 emit_move_insn (reg_rtxs[reg_order[i]], op);
14361 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14363 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14364 if (TARGET_THUMB1)
14366 gcc_assert (base_reg_dies);
14367 write_back = TRUE;
14370 if (stm_case == 5)
14372 gcc_assert (base_reg_dies);
14373 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14374 offset = 0;
14377 addr = plus_constant (Pmode, base_reg_rtx, offset);
14379 for (i = 0; i < nops; i++)
14381 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14382 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14383 SImode, addr, 0);
14385 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14386 write_back ? offset + i * 4 : 0));
14387 return true;
14390 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14391 unaligned copies on processors which support unaligned semantics for those
14392 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14393 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14394 An interleave factor of 1 (the minimum) will perform no interleaving.
14395 Load/store multiple are used for aligned addresses where possible. */
14397 static void
14398 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14399 HOST_WIDE_INT length,
14400 unsigned int interleave_factor)
14402 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14403 int *regnos = XALLOCAVEC (int, interleave_factor);
14404 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14405 HOST_WIDE_INT i, j;
14406 HOST_WIDE_INT remaining = length, words;
14407 rtx halfword_tmp = NULL, byte_tmp = NULL;
14408 rtx dst, src;
14409 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14410 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14411 HOST_WIDE_INT srcoffset, dstoffset;
14412 HOST_WIDE_INT src_autoinc, dst_autoinc;
14413 rtx mem, addr;
14415 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14417 /* Use hard registers if we have aligned source or destination so we can use
14418 load/store multiple with contiguous registers. */
14419 if (dst_aligned || src_aligned)
14420 for (i = 0; i < interleave_factor; i++)
14421 regs[i] = gen_rtx_REG (SImode, i);
14422 else
14423 for (i = 0; i < interleave_factor; i++)
14424 regs[i] = gen_reg_rtx (SImode);
14426 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14427 src = copy_addr_to_reg (XEXP (srcbase, 0));
14429 srcoffset = dstoffset = 0;
14431 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14432 For copying the last bytes we want to subtract this offset again. */
14433 src_autoinc = dst_autoinc = 0;
14435 for (i = 0; i < interleave_factor; i++)
14436 regnos[i] = i;
14438 /* Copy BLOCK_SIZE_BYTES chunks. */
14440 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14442 /* Load words. */
14443 if (src_aligned && interleave_factor > 1)
14445 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14446 TRUE, srcbase, &srcoffset));
14447 src_autoinc += UNITS_PER_WORD * interleave_factor;
14449 else
14451 for (j = 0; j < interleave_factor; j++)
14453 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14454 - src_autoinc));
14455 mem = adjust_automodify_address (srcbase, SImode, addr,
14456 srcoffset + j * UNITS_PER_WORD);
14457 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14459 srcoffset += block_size_bytes;
14462 /* Store words. */
14463 if (dst_aligned && interleave_factor > 1)
14465 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14466 TRUE, dstbase, &dstoffset));
14467 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14469 else
14471 for (j = 0; j < interleave_factor; j++)
14473 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14474 - dst_autoinc));
14475 mem = adjust_automodify_address (dstbase, SImode, addr,
14476 dstoffset + j * UNITS_PER_WORD);
14477 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14479 dstoffset += block_size_bytes;
14482 remaining -= block_size_bytes;
14485 /* Copy any whole words left (note these aren't interleaved with any
14486 subsequent halfword/byte load/stores in the interests of simplicity). */
14488 words = remaining / UNITS_PER_WORD;
14490 gcc_assert (words < interleave_factor);
14492 if (src_aligned && words > 1)
14494 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14495 &srcoffset));
14496 src_autoinc += UNITS_PER_WORD * words;
14498 else
14500 for (j = 0; j < words; j++)
14502 addr = plus_constant (Pmode, src,
14503 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14504 mem = adjust_automodify_address (srcbase, SImode, addr,
14505 srcoffset + j * UNITS_PER_WORD);
14506 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14508 srcoffset += words * UNITS_PER_WORD;
14511 if (dst_aligned && words > 1)
14513 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14514 &dstoffset));
14515 dst_autoinc += words * UNITS_PER_WORD;
14517 else
14519 for (j = 0; j < words; j++)
14521 addr = plus_constant (Pmode, dst,
14522 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14523 mem = adjust_automodify_address (dstbase, SImode, addr,
14524 dstoffset + j * UNITS_PER_WORD);
14525 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14527 dstoffset += words * UNITS_PER_WORD;
14530 remaining -= words * UNITS_PER_WORD;
14532 gcc_assert (remaining < 4);
14534 /* Copy a halfword if necessary. */
14536 if (remaining >= 2)
14538 halfword_tmp = gen_reg_rtx (SImode);
14540 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14541 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14542 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14544 /* Either write out immediately, or delay until we've loaded the last
14545 byte, depending on interleave factor. */
14546 if (interleave_factor == 1)
14548 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14549 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14550 emit_insn (gen_unaligned_storehi (mem,
14551 gen_lowpart (HImode, halfword_tmp)));
14552 halfword_tmp = NULL;
14553 dstoffset += 2;
14556 remaining -= 2;
14557 srcoffset += 2;
14560 gcc_assert (remaining < 2);
14562 /* Copy last byte. */
14564 if ((remaining & 1) != 0)
14566 byte_tmp = gen_reg_rtx (SImode);
14568 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14569 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14570 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14572 if (interleave_factor == 1)
14574 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14575 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14576 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14577 byte_tmp = NULL;
14578 dstoffset++;
14581 remaining--;
14582 srcoffset++;
14585 /* Store last halfword if we haven't done so already. */
14587 if (halfword_tmp)
14589 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14590 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14591 emit_insn (gen_unaligned_storehi (mem,
14592 gen_lowpart (HImode, halfword_tmp)));
14593 dstoffset += 2;
14596 /* Likewise for last byte. */
14598 if (byte_tmp)
14600 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14601 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14602 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14603 dstoffset++;
14606 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14609 /* From mips_adjust_block_mem:
14611 Helper function for doing a loop-based block operation on memory
14612 reference MEM. Each iteration of the loop will operate on LENGTH
14613 bytes of MEM.
14615 Create a new base register for use within the loop and point it to
14616 the start of MEM. Create a new memory reference that uses this
14617 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14619 static void
14620 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14621 rtx *loop_mem)
14623 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14625 /* Although the new mem does not refer to a known location,
14626 it does keep up to LENGTH bytes of alignment. */
14627 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14628 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14631 /* From mips_block_move_loop:
14633 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14634 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14635 the memory regions do not overlap. */
14637 static void
14638 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14639 unsigned int interleave_factor,
14640 HOST_WIDE_INT bytes_per_iter)
14642 rtx src_reg, dest_reg, final_src, test;
14643 HOST_WIDE_INT leftover;
14645 leftover = length % bytes_per_iter;
14646 length -= leftover;
14648 /* Create registers and memory references for use within the loop. */
14649 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14650 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14652 /* Calculate the value that SRC_REG should have after the last iteration of
14653 the loop. */
14654 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14655 0, 0, OPTAB_WIDEN);
14657 /* Emit the start of the loop. */
14658 rtx_code_label *label = gen_label_rtx ();
14659 emit_label (label);
14661 /* Emit the loop body. */
14662 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14663 interleave_factor);
14665 /* Move on to the next block. */
14666 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14667 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14669 /* Emit the loop condition. */
14670 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14671 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14673 /* Mop up any left-over bytes. */
14674 if (leftover)
14675 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14678 /* Emit a block move when either the source or destination is unaligned (not
14679 aligned to a four-byte boundary). This may need further tuning depending on
14680 core type, optimize_size setting, etc. */
14682 static int
14683 arm_movmemqi_unaligned (rtx *operands)
14685 HOST_WIDE_INT length = INTVAL (operands[2]);
14687 if (optimize_size)
14689 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14690 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14691 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14692 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14693 or dst_aligned though: allow more interleaving in those cases since the
14694 resulting code can be smaller. */
14695 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14696 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14698 if (length > 12)
14699 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14700 interleave_factor, bytes_per_iter);
14701 else
14702 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14703 interleave_factor);
14705 else
14707 /* Note that the loop created by arm_block_move_unaligned_loop may be
14708 subject to loop unrolling, which makes tuning this condition a little
14709 redundant. */
14710 if (length > 32)
14711 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14712 else
14713 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14716 return 1;
14720 arm_gen_movmemqi (rtx *operands)
14722 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14723 HOST_WIDE_INT srcoffset, dstoffset;
14724 int i;
14725 rtx src, dst, srcbase, dstbase;
14726 rtx part_bytes_reg = NULL;
14727 rtx mem;
14729 if (!CONST_INT_P (operands[2])
14730 || !CONST_INT_P (operands[3])
14731 || INTVAL (operands[2]) > 64)
14732 return 0;
14734 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14735 return arm_movmemqi_unaligned (operands);
14737 if (INTVAL (operands[3]) & 3)
14738 return 0;
14740 dstbase = operands[0];
14741 srcbase = operands[1];
14743 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14744 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14746 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14747 out_words_to_go = INTVAL (operands[2]) / 4;
14748 last_bytes = INTVAL (operands[2]) & 3;
14749 dstoffset = srcoffset = 0;
14751 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14752 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14754 for (i = 0; in_words_to_go >= 2; i+=4)
14756 if (in_words_to_go > 4)
14757 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14758 TRUE, srcbase, &srcoffset));
14759 else
14760 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14761 src, FALSE, srcbase,
14762 &srcoffset));
14764 if (out_words_to_go)
14766 if (out_words_to_go > 4)
14767 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14768 TRUE, dstbase, &dstoffset));
14769 else if (out_words_to_go != 1)
14770 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14771 out_words_to_go, dst,
14772 (last_bytes == 0
14773 ? FALSE : TRUE),
14774 dstbase, &dstoffset));
14775 else
14777 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14778 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14779 if (last_bytes != 0)
14781 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14782 dstoffset += 4;
14787 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14788 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14791 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14792 if (out_words_to_go)
14794 rtx sreg;
14796 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14797 sreg = copy_to_reg (mem);
14799 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14800 emit_move_insn (mem, sreg);
14801 in_words_to_go--;
14803 gcc_assert (!in_words_to_go); /* Sanity check */
14806 if (in_words_to_go)
14808 gcc_assert (in_words_to_go > 0);
14810 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14811 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14814 gcc_assert (!last_bytes || part_bytes_reg);
14816 if (BYTES_BIG_ENDIAN && last_bytes)
14818 rtx tmp = gen_reg_rtx (SImode);
14820 /* The bytes we want are in the top end of the word. */
14821 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14822 GEN_INT (8 * (4 - last_bytes))));
14823 part_bytes_reg = tmp;
14825 while (last_bytes)
14827 mem = adjust_automodify_address (dstbase, QImode,
14828 plus_constant (Pmode, dst,
14829 last_bytes - 1),
14830 dstoffset + last_bytes - 1);
14831 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14833 if (--last_bytes)
14835 tmp = gen_reg_rtx (SImode);
14836 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14837 part_bytes_reg = tmp;
14842 else
14844 if (last_bytes > 1)
14846 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14847 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14848 last_bytes -= 2;
14849 if (last_bytes)
14851 rtx tmp = gen_reg_rtx (SImode);
14852 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14853 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14854 part_bytes_reg = tmp;
14855 dstoffset += 2;
14859 if (last_bytes)
14861 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14862 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14866 return 1;
14869 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14870 by mode size. */
14871 inline static rtx
14872 next_consecutive_mem (rtx mem)
14874 machine_mode mode = GET_MODE (mem);
14875 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14876 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14878 return adjust_automodify_address (mem, mode, addr, offset);
14881 /* Copy using LDRD/STRD instructions whenever possible.
14882 Returns true upon success. */
14883 bool
14884 gen_movmem_ldrd_strd (rtx *operands)
14886 unsigned HOST_WIDE_INT len;
14887 HOST_WIDE_INT align;
14888 rtx src, dst, base;
14889 rtx reg0;
14890 bool src_aligned, dst_aligned;
14891 bool src_volatile, dst_volatile;
14893 gcc_assert (CONST_INT_P (operands[2]));
14894 gcc_assert (CONST_INT_P (operands[3]));
14896 len = UINTVAL (operands[2]);
14897 if (len > 64)
14898 return false;
14900 /* Maximum alignment we can assume for both src and dst buffers. */
14901 align = INTVAL (operands[3]);
14903 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14904 return false;
14906 /* Place src and dst addresses in registers
14907 and update the corresponding mem rtx. */
14908 dst = operands[0];
14909 dst_volatile = MEM_VOLATILE_P (dst);
14910 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14911 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14912 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14914 src = operands[1];
14915 src_volatile = MEM_VOLATILE_P (src);
14916 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14917 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14918 src = adjust_automodify_address (src, VOIDmode, base, 0);
14920 if (!unaligned_access && !(src_aligned && dst_aligned))
14921 return false;
14923 if (src_volatile || dst_volatile)
14924 return false;
14926 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14927 if (!(dst_aligned || src_aligned))
14928 return arm_gen_movmemqi (operands);
14930 src = adjust_address (src, DImode, 0);
14931 dst = adjust_address (dst, DImode, 0);
14932 while (len >= 8)
14934 len -= 8;
14935 reg0 = gen_reg_rtx (DImode);
14936 if (src_aligned)
14937 emit_move_insn (reg0, src);
14938 else
14939 emit_insn (gen_unaligned_loaddi (reg0, src));
14941 if (dst_aligned)
14942 emit_move_insn (dst, reg0);
14943 else
14944 emit_insn (gen_unaligned_storedi (dst, reg0));
14946 src = next_consecutive_mem (src);
14947 dst = next_consecutive_mem (dst);
14950 gcc_assert (len < 8);
14951 if (len >= 4)
14953 /* More than a word but less than a double-word to copy. Copy a word. */
14954 reg0 = gen_reg_rtx (SImode);
14955 src = adjust_address (src, SImode, 0);
14956 dst = adjust_address (dst, SImode, 0);
14957 if (src_aligned)
14958 emit_move_insn (reg0, src);
14959 else
14960 emit_insn (gen_unaligned_loadsi (reg0, src));
14962 if (dst_aligned)
14963 emit_move_insn (dst, reg0);
14964 else
14965 emit_insn (gen_unaligned_storesi (dst, reg0));
14967 src = next_consecutive_mem (src);
14968 dst = next_consecutive_mem (dst);
14969 len -= 4;
14972 if (len == 0)
14973 return true;
14975 /* Copy the remaining bytes. */
14976 if (len >= 2)
14978 dst = adjust_address (dst, HImode, 0);
14979 src = adjust_address (src, HImode, 0);
14980 reg0 = gen_reg_rtx (SImode);
14981 if (src_aligned)
14982 emit_insn (gen_zero_extendhisi2 (reg0, src));
14983 else
14984 emit_insn (gen_unaligned_loadhiu (reg0, src));
14986 if (dst_aligned)
14987 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14988 else
14989 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14991 src = next_consecutive_mem (src);
14992 dst = next_consecutive_mem (dst);
14993 if (len == 2)
14994 return true;
14997 dst = adjust_address (dst, QImode, 0);
14998 src = adjust_address (src, QImode, 0);
14999 reg0 = gen_reg_rtx (QImode);
15000 emit_move_insn (reg0, src);
15001 emit_move_insn (dst, reg0);
15002 return true;
15005 /* Select a dominance comparison mode if possible for a test of the general
15006 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
15007 COND_OR == DOM_CC_X_AND_Y => (X && Y)
15008 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15009 COND_OR == DOM_CC_X_OR_Y => (X || Y)
15010 In all cases OP will be either EQ or NE, but we don't need to know which
15011 here. If we are unable to support a dominance comparison we return
15012 CC mode. This will then fail to match for the RTL expressions that
15013 generate this call. */
15014 machine_mode
15015 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
15017 enum rtx_code cond1, cond2;
15018 int swapped = 0;
15020 /* Currently we will probably get the wrong result if the individual
15021 comparisons are not simple. This also ensures that it is safe to
15022 reverse a comparison if necessary. */
15023 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
15024 != CCmode)
15025 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
15026 != CCmode))
15027 return CCmode;
15029 /* The if_then_else variant of this tests the second condition if the
15030 first passes, but is true if the first fails. Reverse the first
15031 condition to get a true "inclusive-or" expression. */
15032 if (cond_or == DOM_CC_NX_OR_Y)
15033 cond1 = reverse_condition (cond1);
15035 /* If the comparisons are not equal, and one doesn't dominate the other,
15036 then we can't do this. */
15037 if (cond1 != cond2
15038 && !comparison_dominates_p (cond1, cond2)
15039 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
15040 return CCmode;
15042 if (swapped)
15043 std::swap (cond1, cond2);
15045 switch (cond1)
15047 case EQ:
15048 if (cond_or == DOM_CC_X_AND_Y)
15049 return CC_DEQmode;
15051 switch (cond2)
15053 case EQ: return CC_DEQmode;
15054 case LE: return CC_DLEmode;
15055 case LEU: return CC_DLEUmode;
15056 case GE: return CC_DGEmode;
15057 case GEU: return CC_DGEUmode;
15058 default: gcc_unreachable ();
15061 case LT:
15062 if (cond_or == DOM_CC_X_AND_Y)
15063 return CC_DLTmode;
15065 switch (cond2)
15067 case LT:
15068 return CC_DLTmode;
15069 case LE:
15070 return CC_DLEmode;
15071 case NE:
15072 return CC_DNEmode;
15073 default:
15074 gcc_unreachable ();
15077 case GT:
15078 if (cond_or == DOM_CC_X_AND_Y)
15079 return CC_DGTmode;
15081 switch (cond2)
15083 case GT:
15084 return CC_DGTmode;
15085 case GE:
15086 return CC_DGEmode;
15087 case NE:
15088 return CC_DNEmode;
15089 default:
15090 gcc_unreachable ();
15093 case LTU:
15094 if (cond_or == DOM_CC_X_AND_Y)
15095 return CC_DLTUmode;
15097 switch (cond2)
15099 case LTU:
15100 return CC_DLTUmode;
15101 case LEU:
15102 return CC_DLEUmode;
15103 case NE:
15104 return CC_DNEmode;
15105 default:
15106 gcc_unreachable ();
15109 case GTU:
15110 if (cond_or == DOM_CC_X_AND_Y)
15111 return CC_DGTUmode;
15113 switch (cond2)
15115 case GTU:
15116 return CC_DGTUmode;
15117 case GEU:
15118 return CC_DGEUmode;
15119 case NE:
15120 return CC_DNEmode;
15121 default:
15122 gcc_unreachable ();
15125 /* The remaining cases only occur when both comparisons are the
15126 same. */
15127 case NE:
15128 gcc_assert (cond1 == cond2);
15129 return CC_DNEmode;
15131 case LE:
15132 gcc_assert (cond1 == cond2);
15133 return CC_DLEmode;
15135 case GE:
15136 gcc_assert (cond1 == cond2);
15137 return CC_DGEmode;
15139 case LEU:
15140 gcc_assert (cond1 == cond2);
15141 return CC_DLEUmode;
15143 case GEU:
15144 gcc_assert (cond1 == cond2);
15145 return CC_DGEUmode;
15147 default:
15148 gcc_unreachable ();
15152 machine_mode
15153 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15155 /* All floating point compares return CCFP if it is an equality
15156 comparison, and CCFPE otherwise. */
15157 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15159 switch (op)
15161 case EQ:
15162 case NE:
15163 case UNORDERED:
15164 case ORDERED:
15165 case UNLT:
15166 case UNLE:
15167 case UNGT:
15168 case UNGE:
15169 case UNEQ:
15170 case LTGT:
15171 return CCFPmode;
15173 case LT:
15174 case LE:
15175 case GT:
15176 case GE:
15177 return CCFPEmode;
15179 default:
15180 gcc_unreachable ();
15184 /* A compare with a shifted operand. Because of canonicalization, the
15185 comparison will have to be swapped when we emit the assembler. */
15186 if (GET_MODE (y) == SImode
15187 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15188 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15189 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15190 || GET_CODE (x) == ROTATERT))
15191 return CC_SWPmode;
15193 /* This operation is performed swapped, but since we only rely on the Z
15194 flag we don't need an additional mode. */
15195 if (GET_MODE (y) == SImode
15196 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15197 && GET_CODE (x) == NEG
15198 && (op == EQ || op == NE))
15199 return CC_Zmode;
15201 /* This is a special case that is used by combine to allow a
15202 comparison of a shifted byte load to be split into a zero-extend
15203 followed by a comparison of the shifted integer (only valid for
15204 equalities and unsigned inequalities). */
15205 if (GET_MODE (x) == SImode
15206 && GET_CODE (x) == ASHIFT
15207 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15208 && GET_CODE (XEXP (x, 0)) == SUBREG
15209 && MEM_P (SUBREG_REG (XEXP (x, 0)))
15210 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15211 && (op == EQ || op == NE
15212 || op == GEU || op == GTU || op == LTU || op == LEU)
15213 && CONST_INT_P (y))
15214 return CC_Zmode;
15216 /* A construct for a conditional compare, if the false arm contains
15217 0, then both conditions must be true, otherwise either condition
15218 must be true. Not all conditions are possible, so CCmode is
15219 returned if it can't be done. */
15220 if (GET_CODE (x) == IF_THEN_ELSE
15221 && (XEXP (x, 2) == const0_rtx
15222 || XEXP (x, 2) == const1_rtx)
15223 && COMPARISON_P (XEXP (x, 0))
15224 && COMPARISON_P (XEXP (x, 1)))
15225 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15226 INTVAL (XEXP (x, 2)));
15228 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15229 if (GET_CODE (x) == AND
15230 && (op == EQ || op == NE)
15231 && COMPARISON_P (XEXP (x, 0))
15232 && COMPARISON_P (XEXP (x, 1)))
15233 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15234 DOM_CC_X_AND_Y);
15236 if (GET_CODE (x) == IOR
15237 && (op == EQ || op == NE)
15238 && COMPARISON_P (XEXP (x, 0))
15239 && COMPARISON_P (XEXP (x, 1)))
15240 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15241 DOM_CC_X_OR_Y);
15243 /* An operation (on Thumb) where we want to test for a single bit.
15244 This is done by shifting that bit up into the top bit of a
15245 scratch register; we can then branch on the sign bit. */
15246 if (TARGET_THUMB1
15247 && GET_MODE (x) == SImode
15248 && (op == EQ || op == NE)
15249 && GET_CODE (x) == ZERO_EXTRACT
15250 && XEXP (x, 1) == const1_rtx)
15251 return CC_Nmode;
15253 /* An operation that sets the condition codes as a side-effect, the
15254 V flag is not set correctly, so we can only use comparisons where
15255 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15256 instead.) */
15257 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15258 if (GET_MODE (x) == SImode
15259 && y == const0_rtx
15260 && (op == EQ || op == NE || op == LT || op == GE)
15261 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15262 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15263 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15264 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15265 || GET_CODE (x) == LSHIFTRT
15266 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15267 || GET_CODE (x) == ROTATERT
15268 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15269 return CC_NOOVmode;
15271 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15272 return CC_Zmode;
15274 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15275 && GET_CODE (x) == PLUS
15276 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15277 return CC_Cmode;
15279 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15281 switch (op)
15283 case EQ:
15284 case NE:
15285 /* A DImode comparison against zero can be implemented by
15286 or'ing the two halves together. */
15287 if (y == const0_rtx)
15288 return CC_Zmode;
15290 /* We can do an equality test in three Thumb instructions. */
15291 if (!TARGET_32BIT)
15292 return CC_Zmode;
15294 /* FALLTHROUGH */
15296 case LTU:
15297 case LEU:
15298 case GTU:
15299 case GEU:
15300 /* DImode unsigned comparisons can be implemented by cmp +
15301 cmpeq without a scratch register. Not worth doing in
15302 Thumb-2. */
15303 if (TARGET_32BIT)
15304 return CC_CZmode;
15306 /* FALLTHROUGH */
15308 case LT:
15309 case LE:
15310 case GT:
15311 case GE:
15312 /* DImode signed and unsigned comparisons can be implemented
15313 by cmp + sbcs with a scratch register, but that does not
15314 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15315 gcc_assert (op != EQ && op != NE);
15316 return CC_NCVmode;
15318 default:
15319 gcc_unreachable ();
15323 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15324 return GET_MODE (x);
15326 return CCmode;
15329 /* X and Y are two things to compare using CODE. Emit the compare insn and
15330 return the rtx for register 0 in the proper mode. FP means this is a
15331 floating point compare: I don't think that it is needed on the arm. */
15333 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15335 machine_mode mode;
15336 rtx cc_reg;
15337 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15339 /* We might have X as a constant, Y as a register because of the predicates
15340 used for cmpdi. If so, force X to a register here. */
15341 if (dimode_comparison && !REG_P (x))
15342 x = force_reg (DImode, x);
15344 mode = SELECT_CC_MODE (code, x, y);
15345 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15347 if (dimode_comparison
15348 && mode != CC_CZmode)
15350 rtx clobber, set;
15352 /* To compare two non-zero values for equality, XOR them and
15353 then compare against zero. Not used for ARM mode; there
15354 CC_CZmode is cheaper. */
15355 if (mode == CC_Zmode && y != const0_rtx)
15357 gcc_assert (!reload_completed);
15358 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15359 y = const0_rtx;
15362 /* A scratch register is required. */
15363 if (reload_completed)
15364 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15365 else
15366 scratch = gen_rtx_SCRATCH (SImode);
15368 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15369 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
15370 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15372 else
15373 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15375 return cc_reg;
15378 /* Generate a sequence of insns that will generate the correct return
15379 address mask depending on the physical architecture that the program
15380 is running on. */
15382 arm_gen_return_addr_mask (void)
15384 rtx reg = gen_reg_rtx (Pmode);
15386 emit_insn (gen_return_addr_mask (reg));
15387 return reg;
15390 void
15391 arm_reload_in_hi (rtx *operands)
15393 rtx ref = operands[1];
15394 rtx base, scratch;
15395 HOST_WIDE_INT offset = 0;
15397 if (GET_CODE (ref) == SUBREG)
15399 offset = SUBREG_BYTE (ref);
15400 ref = SUBREG_REG (ref);
15403 if (REG_P (ref))
15405 /* We have a pseudo which has been spilt onto the stack; there
15406 are two cases here: the first where there is a simple
15407 stack-slot replacement and a second where the stack-slot is
15408 out of range, or is used as a subreg. */
15409 if (reg_equiv_mem (REGNO (ref)))
15411 ref = reg_equiv_mem (REGNO (ref));
15412 base = find_replacement (&XEXP (ref, 0));
15414 else
15415 /* The slot is out of range, or was dressed up in a SUBREG. */
15416 base = reg_equiv_address (REGNO (ref));
15418 else
15419 base = find_replacement (&XEXP (ref, 0));
15421 /* Handle the case where the address is too complex to be offset by 1. */
15422 if (GET_CODE (base) == MINUS
15423 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15425 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15427 emit_set_insn (base_plus, base);
15428 base = base_plus;
15430 else if (GET_CODE (base) == PLUS)
15432 /* The addend must be CONST_INT, or we would have dealt with it above. */
15433 HOST_WIDE_INT hi, lo;
15435 offset += INTVAL (XEXP (base, 1));
15436 base = XEXP (base, 0);
15438 /* Rework the address into a legal sequence of insns. */
15439 /* Valid range for lo is -4095 -> 4095 */
15440 lo = (offset >= 0
15441 ? (offset & 0xfff)
15442 : -((-offset) & 0xfff));
15444 /* Corner case, if lo is the max offset then we would be out of range
15445 once we have added the additional 1 below, so bump the msb into the
15446 pre-loading insn(s). */
15447 if (lo == 4095)
15448 lo &= 0x7ff;
15450 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15451 ^ (HOST_WIDE_INT) 0x80000000)
15452 - (HOST_WIDE_INT) 0x80000000);
15454 gcc_assert (hi + lo == offset);
15456 if (hi != 0)
15458 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15460 /* Get the base address; addsi3 knows how to handle constants
15461 that require more than one insn. */
15462 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15463 base = base_plus;
15464 offset = lo;
15468 /* Operands[2] may overlap operands[0] (though it won't overlap
15469 operands[1]), that's why we asked for a DImode reg -- so we can
15470 use the bit that does not overlap. */
15471 if (REGNO (operands[2]) == REGNO (operands[0]))
15472 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15473 else
15474 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15476 emit_insn (gen_zero_extendqisi2 (scratch,
15477 gen_rtx_MEM (QImode,
15478 plus_constant (Pmode, base,
15479 offset))));
15480 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15481 gen_rtx_MEM (QImode,
15482 plus_constant (Pmode, base,
15483 offset + 1))));
15484 if (!BYTES_BIG_ENDIAN)
15485 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15486 gen_rtx_IOR (SImode,
15487 gen_rtx_ASHIFT
15488 (SImode,
15489 gen_rtx_SUBREG (SImode, operands[0], 0),
15490 GEN_INT (8)),
15491 scratch));
15492 else
15493 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15494 gen_rtx_IOR (SImode,
15495 gen_rtx_ASHIFT (SImode, scratch,
15496 GEN_INT (8)),
15497 gen_rtx_SUBREG (SImode, operands[0], 0)));
15500 /* Handle storing a half-word to memory during reload by synthesizing as two
15501 byte stores. Take care not to clobber the input values until after we
15502 have moved them somewhere safe. This code assumes that if the DImode
15503 scratch in operands[2] overlaps either the input value or output address
15504 in some way, then that value must die in this insn (we absolutely need
15505 two scratch registers for some corner cases). */
15506 void
15507 arm_reload_out_hi (rtx *operands)
15509 rtx ref = operands[0];
15510 rtx outval = operands[1];
15511 rtx base, scratch;
15512 HOST_WIDE_INT offset = 0;
15514 if (GET_CODE (ref) == SUBREG)
15516 offset = SUBREG_BYTE (ref);
15517 ref = SUBREG_REG (ref);
15520 if (REG_P (ref))
15522 /* We have a pseudo which has been spilt onto the stack; there
15523 are two cases here: the first where there is a simple
15524 stack-slot replacement and a second where the stack-slot is
15525 out of range, or is used as a subreg. */
15526 if (reg_equiv_mem (REGNO (ref)))
15528 ref = reg_equiv_mem (REGNO (ref));
15529 base = find_replacement (&XEXP (ref, 0));
15531 else
15532 /* The slot is out of range, or was dressed up in a SUBREG. */
15533 base = reg_equiv_address (REGNO (ref));
15535 else
15536 base = find_replacement (&XEXP (ref, 0));
15538 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15540 /* Handle the case where the address is too complex to be offset by 1. */
15541 if (GET_CODE (base) == MINUS
15542 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15544 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15546 /* Be careful not to destroy OUTVAL. */
15547 if (reg_overlap_mentioned_p (base_plus, outval))
15549 /* Updating base_plus might destroy outval, see if we can
15550 swap the scratch and base_plus. */
15551 if (!reg_overlap_mentioned_p (scratch, outval))
15552 std::swap (scratch, base_plus);
15553 else
15555 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15557 /* Be conservative and copy OUTVAL into the scratch now,
15558 this should only be necessary if outval is a subreg
15559 of something larger than a word. */
15560 /* XXX Might this clobber base? I can't see how it can,
15561 since scratch is known to overlap with OUTVAL, and
15562 must be wider than a word. */
15563 emit_insn (gen_movhi (scratch_hi, outval));
15564 outval = scratch_hi;
15568 emit_set_insn (base_plus, base);
15569 base = base_plus;
15571 else if (GET_CODE (base) == PLUS)
15573 /* The addend must be CONST_INT, or we would have dealt with it above. */
15574 HOST_WIDE_INT hi, lo;
15576 offset += INTVAL (XEXP (base, 1));
15577 base = XEXP (base, 0);
15579 /* Rework the address into a legal sequence of insns. */
15580 /* Valid range for lo is -4095 -> 4095 */
15581 lo = (offset >= 0
15582 ? (offset & 0xfff)
15583 : -((-offset) & 0xfff));
15585 /* Corner case, if lo is the max offset then we would be out of range
15586 once we have added the additional 1 below, so bump the msb into the
15587 pre-loading insn(s). */
15588 if (lo == 4095)
15589 lo &= 0x7ff;
15591 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15592 ^ (HOST_WIDE_INT) 0x80000000)
15593 - (HOST_WIDE_INT) 0x80000000);
15595 gcc_assert (hi + lo == offset);
15597 if (hi != 0)
15599 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15601 /* Be careful not to destroy OUTVAL. */
15602 if (reg_overlap_mentioned_p (base_plus, outval))
15604 /* Updating base_plus might destroy outval, see if we
15605 can swap the scratch and base_plus. */
15606 if (!reg_overlap_mentioned_p (scratch, outval))
15607 std::swap (scratch, base_plus);
15608 else
15610 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15612 /* Be conservative and copy outval into scratch now,
15613 this should only be necessary if outval is a
15614 subreg of something larger than a word. */
15615 /* XXX Might this clobber base? I can't see how it
15616 can, since scratch is known to overlap with
15617 outval. */
15618 emit_insn (gen_movhi (scratch_hi, outval));
15619 outval = scratch_hi;
15623 /* Get the base address; addsi3 knows how to handle constants
15624 that require more than one insn. */
15625 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15626 base = base_plus;
15627 offset = lo;
15631 if (BYTES_BIG_ENDIAN)
15633 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15634 plus_constant (Pmode, base,
15635 offset + 1)),
15636 gen_lowpart (QImode, outval)));
15637 emit_insn (gen_lshrsi3 (scratch,
15638 gen_rtx_SUBREG (SImode, outval, 0),
15639 GEN_INT (8)));
15640 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15641 offset)),
15642 gen_lowpart (QImode, scratch)));
15644 else
15646 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15647 offset)),
15648 gen_lowpart (QImode, outval)));
15649 emit_insn (gen_lshrsi3 (scratch,
15650 gen_rtx_SUBREG (SImode, outval, 0),
15651 GEN_INT (8)));
15652 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15653 plus_constant (Pmode, base,
15654 offset + 1)),
15655 gen_lowpart (QImode, scratch)));
15659 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15660 (padded to the size of a word) should be passed in a register. */
15662 static bool
15663 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15665 if (TARGET_AAPCS_BASED)
15666 return must_pass_in_stack_var_size (mode, type);
15667 else
15668 return must_pass_in_stack_var_size_or_pad (mode, type);
15672 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15673 Return true if an argument passed on the stack should be padded upwards,
15674 i.e. if the least-significant byte has useful data.
15675 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15676 aggregate types are placed in the lowest memory address. */
15678 bool
15679 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15681 if (!TARGET_AAPCS_BASED)
15682 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15684 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15685 return false;
15687 return true;
15691 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15692 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15693 register has useful data, and return the opposite if the most
15694 significant byte does. */
15696 bool
15697 arm_pad_reg_upward (machine_mode mode,
15698 tree type, int first ATTRIBUTE_UNUSED)
15700 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15702 /* For AAPCS, small aggregates, small fixed-point types,
15703 and small complex types are always padded upwards. */
15704 if (type)
15706 if ((AGGREGATE_TYPE_P (type)
15707 || TREE_CODE (type) == COMPLEX_TYPE
15708 || FIXED_POINT_TYPE_P (type))
15709 && int_size_in_bytes (type) <= 4)
15710 return true;
15712 else
15714 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15715 && GET_MODE_SIZE (mode) <= 4)
15716 return true;
15720 /* Otherwise, use default padding. */
15721 return !BYTES_BIG_ENDIAN;
15724 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15725 assuming that the address in the base register is word aligned. */
15726 bool
15727 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15729 HOST_WIDE_INT max_offset;
15731 /* Offset must be a multiple of 4 in Thumb mode. */
15732 if (TARGET_THUMB2 && ((offset & 3) != 0))
15733 return false;
15735 if (TARGET_THUMB2)
15736 max_offset = 1020;
15737 else if (TARGET_ARM)
15738 max_offset = 255;
15739 else
15740 return false;
15742 return ((offset <= max_offset) && (offset >= -max_offset));
15745 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15746 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15747 Assumes that the address in the base register RN is word aligned. Pattern
15748 guarantees that both memory accesses use the same base register,
15749 the offsets are constants within the range, and the gap between the offsets is 4.
15750 If preload complete then check that registers are legal. WBACK indicates whether
15751 address is updated. LOAD indicates whether memory access is load or store. */
15752 bool
15753 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15754 bool wback, bool load)
15756 unsigned int t, t2, n;
15758 if (!reload_completed)
15759 return true;
15761 if (!offset_ok_for_ldrd_strd (offset))
15762 return false;
15764 t = REGNO (rt);
15765 t2 = REGNO (rt2);
15766 n = REGNO (rn);
15768 if ((TARGET_THUMB2)
15769 && ((wback && (n == t || n == t2))
15770 || (t == SP_REGNUM)
15771 || (t == PC_REGNUM)
15772 || (t2 == SP_REGNUM)
15773 || (t2 == PC_REGNUM)
15774 || (!load && (n == PC_REGNUM))
15775 || (load && (t == t2))
15776 /* Triggers Cortex-M3 LDRD errata. */
15777 || (!wback && load && fix_cm3_ldrd && (n == t))))
15778 return false;
15780 if ((TARGET_ARM)
15781 && ((wback && (n == t || n == t2))
15782 || (t2 == PC_REGNUM)
15783 || (t % 2 != 0) /* First destination register is not even. */
15784 || (t2 != t + 1)
15785 /* PC can be used as base register (for offset addressing only),
15786 but it is depricated. */
15787 || (n == PC_REGNUM)))
15788 return false;
15790 return true;
15793 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15794 operand MEM's address contains an immediate offset from the base
15795 register and has no side effects, in which case it sets BASE and
15796 OFFSET accordingly. */
15797 static bool
15798 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15800 rtx addr;
15802 gcc_assert (base != NULL && offset != NULL);
15804 /* TODO: Handle more general memory operand patterns, such as
15805 PRE_DEC and PRE_INC. */
15807 if (side_effects_p (mem))
15808 return false;
15810 /* Can't deal with subregs. */
15811 if (GET_CODE (mem) == SUBREG)
15812 return false;
15814 gcc_assert (MEM_P (mem));
15816 *offset = const0_rtx;
15818 addr = XEXP (mem, 0);
15820 /* If addr isn't valid for DImode, then we can't handle it. */
15821 if (!arm_legitimate_address_p (DImode, addr,
15822 reload_in_progress || reload_completed))
15823 return false;
15825 if (REG_P (addr))
15827 *base = addr;
15828 return true;
15830 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15832 *base = XEXP (addr, 0);
15833 *offset = XEXP (addr, 1);
15834 return (REG_P (*base) && CONST_INT_P (*offset));
15837 return false;
15840 /* Called from a peephole2 to replace two word-size accesses with a
15841 single LDRD/STRD instruction. Returns true iff we can generate a
15842 new instruction sequence. That is, both accesses use the same base
15843 register and the gap between constant offsets is 4. This function
15844 may reorder its operands to match ldrd/strd RTL templates.
15845 OPERANDS are the operands found by the peephole matcher;
15846 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15847 corresponding memory operands. LOAD indicaates whether the access
15848 is load or store. CONST_STORE indicates a store of constant
15849 integer values held in OPERANDS[4,5] and assumes that the pattern
15850 is of length 4 insn, for the purpose of checking dead registers.
15851 COMMUTE indicates that register operands may be reordered. */
15852 bool
15853 gen_operands_ldrd_strd (rtx *operands, bool load,
15854 bool const_store, bool commute)
15856 int nops = 2;
15857 HOST_WIDE_INT offsets[2], offset;
15858 rtx base = NULL_RTX;
15859 rtx cur_base, cur_offset, tmp;
15860 int i, gap;
15861 HARD_REG_SET regset;
15863 gcc_assert (!const_store || !load);
15864 /* Check that the memory references are immediate offsets from the
15865 same base register. Extract the base register, the destination
15866 registers, and the corresponding memory offsets. */
15867 for (i = 0; i < nops; i++)
15869 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15870 return false;
15872 if (i == 0)
15873 base = cur_base;
15874 else if (REGNO (base) != REGNO (cur_base))
15875 return false;
15877 offsets[i] = INTVAL (cur_offset);
15878 if (GET_CODE (operands[i]) == SUBREG)
15880 tmp = SUBREG_REG (operands[i]);
15881 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15882 operands[i] = tmp;
15886 /* Make sure there is no dependency between the individual loads. */
15887 if (load && REGNO (operands[0]) == REGNO (base))
15888 return false; /* RAW */
15890 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15891 return false; /* WAW */
15893 /* If the same input register is used in both stores
15894 when storing different constants, try to find a free register.
15895 For example, the code
15896 mov r0, 0
15897 str r0, [r2]
15898 mov r0, 1
15899 str r0, [r2, #4]
15900 can be transformed into
15901 mov r1, 0
15902 strd r1, r0, [r2]
15903 in Thumb mode assuming that r1 is free. */
15904 if (const_store
15905 && REGNO (operands[0]) == REGNO (operands[1])
15906 && INTVAL (operands[4]) != INTVAL (operands[5]))
15908 if (TARGET_THUMB2)
15910 CLEAR_HARD_REG_SET (regset);
15911 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15912 if (tmp == NULL_RTX)
15913 return false;
15915 /* Use the new register in the first load to ensure that
15916 if the original input register is not dead after peephole,
15917 then it will have the correct constant value. */
15918 operands[0] = tmp;
15920 else if (TARGET_ARM)
15922 return false;
15923 int regno = REGNO (operands[0]);
15924 if (!peep2_reg_dead_p (4, operands[0]))
15926 /* When the input register is even and is not dead after the
15927 pattern, it has to hold the second constant but we cannot
15928 form a legal STRD in ARM mode with this register as the second
15929 register. */
15930 if (regno % 2 == 0)
15931 return false;
15933 /* Is regno-1 free? */
15934 SET_HARD_REG_SET (regset);
15935 CLEAR_HARD_REG_BIT(regset, regno - 1);
15936 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15937 if (tmp == NULL_RTX)
15938 return false;
15940 operands[0] = tmp;
15942 else
15944 /* Find a DImode register. */
15945 CLEAR_HARD_REG_SET (regset);
15946 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15947 if (tmp != NULL_RTX)
15949 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15950 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15952 else
15954 /* Can we use the input register to form a DI register? */
15955 SET_HARD_REG_SET (regset);
15956 CLEAR_HARD_REG_BIT(regset,
15957 regno % 2 == 0 ? regno + 1 : regno - 1);
15958 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15959 if (tmp == NULL_RTX)
15960 return false;
15961 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15965 gcc_assert (operands[0] != NULL_RTX);
15966 gcc_assert (operands[1] != NULL_RTX);
15967 gcc_assert (REGNO (operands[0]) % 2 == 0);
15968 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15972 /* Make sure the instructions are ordered with lower memory access first. */
15973 if (offsets[0] > offsets[1])
15975 gap = offsets[0] - offsets[1];
15976 offset = offsets[1];
15978 /* Swap the instructions such that lower memory is accessed first. */
15979 std::swap (operands[0], operands[1]);
15980 std::swap (operands[2], operands[3]);
15981 if (const_store)
15982 std::swap (operands[4], operands[5]);
15984 else
15986 gap = offsets[1] - offsets[0];
15987 offset = offsets[0];
15990 /* Make sure accesses are to consecutive memory locations. */
15991 if (gap != 4)
15992 return false;
15994 /* Make sure we generate legal instructions. */
15995 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15996 false, load))
15997 return true;
15999 /* In Thumb state, where registers are almost unconstrained, there
16000 is little hope to fix it. */
16001 if (TARGET_THUMB2)
16002 return false;
16004 if (load && commute)
16006 /* Try reordering registers. */
16007 std::swap (operands[0], operands[1]);
16008 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16009 false, load))
16010 return true;
16013 if (const_store)
16015 /* If input registers are dead after this pattern, they can be
16016 reordered or replaced by other registers that are free in the
16017 current pattern. */
16018 if (!peep2_reg_dead_p (4, operands[0])
16019 || !peep2_reg_dead_p (4, operands[1]))
16020 return false;
16022 /* Try to reorder the input registers. */
16023 /* For example, the code
16024 mov r0, 0
16025 mov r1, 1
16026 str r1, [r2]
16027 str r0, [r2, #4]
16028 can be transformed into
16029 mov r1, 0
16030 mov r0, 1
16031 strd r0, [r2]
16033 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
16034 false, false))
16036 std::swap (operands[0], operands[1]);
16037 return true;
16040 /* Try to find a free DI register. */
16041 CLEAR_HARD_REG_SET (regset);
16042 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
16043 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
16044 while (true)
16046 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
16047 if (tmp == NULL_RTX)
16048 return false;
16050 /* DREG must be an even-numbered register in DImode.
16051 Split it into SI registers. */
16052 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16053 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16054 gcc_assert (operands[0] != NULL_RTX);
16055 gcc_assert (operands[1] != NULL_RTX);
16056 gcc_assert (REGNO (operands[0]) % 2 == 0);
16057 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
16059 return (operands_ok_ldrd_strd (operands[0], operands[1],
16060 base, offset,
16061 false, load));
16065 return false;
16071 /* Print a symbolic form of X to the debug file, F. */
16072 static void
16073 arm_print_value (FILE *f, rtx x)
16075 switch (GET_CODE (x))
16077 case CONST_INT:
16078 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
16079 return;
16081 case CONST_DOUBLE:
16082 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
16083 return;
16085 case CONST_VECTOR:
16087 int i;
16089 fprintf (f, "<");
16090 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
16092 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
16093 if (i < (CONST_VECTOR_NUNITS (x) - 1))
16094 fputc (',', f);
16096 fprintf (f, ">");
16098 return;
16100 case CONST_STRING:
16101 fprintf (f, "\"%s\"", XSTR (x, 0));
16102 return;
16104 case SYMBOL_REF:
16105 fprintf (f, "`%s'", XSTR (x, 0));
16106 return;
16108 case LABEL_REF:
16109 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16110 return;
16112 case CONST:
16113 arm_print_value (f, XEXP (x, 0));
16114 return;
16116 case PLUS:
16117 arm_print_value (f, XEXP (x, 0));
16118 fprintf (f, "+");
16119 arm_print_value (f, XEXP (x, 1));
16120 return;
16122 case PC:
16123 fprintf (f, "pc");
16124 return;
16126 default:
16127 fprintf (f, "????");
16128 return;
16132 /* Routines for manipulation of the constant pool. */
16134 /* Arm instructions cannot load a large constant directly into a
16135 register; they have to come from a pc relative load. The constant
16136 must therefore be placed in the addressable range of the pc
16137 relative load. Depending on the precise pc relative load
16138 instruction the range is somewhere between 256 bytes and 4k. This
16139 means that we often have to dump a constant inside a function, and
16140 generate code to branch around it.
16142 It is important to minimize this, since the branches will slow
16143 things down and make the code larger.
16145 Normally we can hide the table after an existing unconditional
16146 branch so that there is no interruption of the flow, but in the
16147 worst case the code looks like this:
16149 ldr rn, L1
16151 b L2
16152 align
16153 L1: .long value
16157 ldr rn, L3
16159 b L4
16160 align
16161 L3: .long value
16165 We fix this by performing a scan after scheduling, which notices
16166 which instructions need to have their operands fetched from the
16167 constant table and builds the table.
16169 The algorithm starts by building a table of all the constants that
16170 need fixing up and all the natural barriers in the function (places
16171 where a constant table can be dropped without breaking the flow).
16172 For each fixup we note how far the pc-relative replacement will be
16173 able to reach and the offset of the instruction into the function.
16175 Having built the table we then group the fixes together to form
16176 tables that are as large as possible (subject to addressing
16177 constraints) and emit each table of constants after the last
16178 barrier that is within range of all the instructions in the group.
16179 If a group does not contain a barrier, then we forcibly create one
16180 by inserting a jump instruction into the flow. Once the table has
16181 been inserted, the insns are then modified to reference the
16182 relevant entry in the pool.
16184 Possible enhancements to the algorithm (not implemented) are:
16186 1) For some processors and object formats, there may be benefit in
16187 aligning the pools to the start of cache lines; this alignment
16188 would need to be taken into account when calculating addressability
16189 of a pool. */
16191 /* These typedefs are located at the start of this file, so that
16192 they can be used in the prototypes there. This comment is to
16193 remind readers of that fact so that the following structures
16194 can be understood more easily.
16196 typedef struct minipool_node Mnode;
16197 typedef struct minipool_fixup Mfix; */
16199 struct minipool_node
16201 /* Doubly linked chain of entries. */
16202 Mnode * next;
16203 Mnode * prev;
16204 /* The maximum offset into the code that this entry can be placed. While
16205 pushing fixes for forward references, all entries are sorted in order
16206 of increasing max_address. */
16207 HOST_WIDE_INT max_address;
16208 /* Similarly for an entry inserted for a backwards ref. */
16209 HOST_WIDE_INT min_address;
16210 /* The number of fixes referencing this entry. This can become zero
16211 if we "unpush" an entry. In this case we ignore the entry when we
16212 come to emit the code. */
16213 int refcount;
16214 /* The offset from the start of the minipool. */
16215 HOST_WIDE_INT offset;
16216 /* The value in table. */
16217 rtx value;
16218 /* The mode of value. */
16219 machine_mode mode;
16220 /* The size of the value. With iWMMXt enabled
16221 sizes > 4 also imply an alignment of 8-bytes. */
16222 int fix_size;
16225 struct minipool_fixup
16227 Mfix * next;
16228 rtx_insn * insn;
16229 HOST_WIDE_INT address;
16230 rtx * loc;
16231 machine_mode mode;
16232 int fix_size;
16233 rtx value;
16234 Mnode * minipool;
16235 HOST_WIDE_INT forwards;
16236 HOST_WIDE_INT backwards;
16239 /* Fixes less than a word need padding out to a word boundary. */
16240 #define MINIPOOL_FIX_SIZE(mode) \
16241 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16243 static Mnode * minipool_vector_head;
16244 static Mnode * minipool_vector_tail;
16245 static rtx_code_label *minipool_vector_label;
16246 static int minipool_pad;
16248 /* The linked list of all minipool fixes required for this function. */
16249 Mfix * minipool_fix_head;
16250 Mfix * minipool_fix_tail;
16251 /* The fix entry for the current minipool, once it has been placed. */
16252 Mfix * minipool_barrier;
16254 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16255 #define JUMP_TABLES_IN_TEXT_SECTION 0
16256 #endif
16258 static HOST_WIDE_INT
16259 get_jump_table_size (rtx_jump_table_data *insn)
16261 /* ADDR_VECs only take room if read-only data does into the text
16262 section. */
16263 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16265 rtx body = PATTERN (insn);
16266 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16267 HOST_WIDE_INT size;
16268 HOST_WIDE_INT modesize;
16270 modesize = GET_MODE_SIZE (GET_MODE (body));
16271 size = modesize * XVECLEN (body, elt);
16272 switch (modesize)
16274 case 1:
16275 /* Round up size of TBB table to a halfword boundary. */
16276 size = (size + 1) & ~(HOST_WIDE_INT)1;
16277 break;
16278 case 2:
16279 /* No padding necessary for TBH. */
16280 break;
16281 case 4:
16282 /* Add two bytes for alignment on Thumb. */
16283 if (TARGET_THUMB)
16284 size += 2;
16285 break;
16286 default:
16287 gcc_unreachable ();
16289 return size;
16292 return 0;
16295 /* Return the maximum amount of padding that will be inserted before
16296 label LABEL. */
16298 static HOST_WIDE_INT
16299 get_label_padding (rtx label)
16301 HOST_WIDE_INT align, min_insn_size;
16303 align = 1 << label_to_alignment (label);
16304 min_insn_size = TARGET_THUMB ? 2 : 4;
16305 return align > min_insn_size ? align - min_insn_size : 0;
16308 /* Move a minipool fix MP from its current location to before MAX_MP.
16309 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16310 constraints may need updating. */
16311 static Mnode *
16312 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16313 HOST_WIDE_INT max_address)
16315 /* The code below assumes these are different. */
16316 gcc_assert (mp != max_mp);
16318 if (max_mp == NULL)
16320 if (max_address < mp->max_address)
16321 mp->max_address = max_address;
16323 else
16325 if (max_address > max_mp->max_address - mp->fix_size)
16326 mp->max_address = max_mp->max_address - mp->fix_size;
16327 else
16328 mp->max_address = max_address;
16330 /* Unlink MP from its current position. Since max_mp is non-null,
16331 mp->prev must be non-null. */
16332 mp->prev->next = mp->next;
16333 if (mp->next != NULL)
16334 mp->next->prev = mp->prev;
16335 else
16336 minipool_vector_tail = mp->prev;
16338 /* Re-insert it before MAX_MP. */
16339 mp->next = max_mp;
16340 mp->prev = max_mp->prev;
16341 max_mp->prev = mp;
16343 if (mp->prev != NULL)
16344 mp->prev->next = mp;
16345 else
16346 minipool_vector_head = mp;
16349 /* Save the new entry. */
16350 max_mp = mp;
16352 /* Scan over the preceding entries and adjust their addresses as
16353 required. */
16354 while (mp->prev != NULL
16355 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16357 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16358 mp = mp->prev;
16361 return max_mp;
16364 /* Add a constant to the minipool for a forward reference. Returns the
16365 node added or NULL if the constant will not fit in this pool. */
16366 static Mnode *
16367 add_minipool_forward_ref (Mfix *fix)
16369 /* If set, max_mp is the first pool_entry that has a lower
16370 constraint than the one we are trying to add. */
16371 Mnode * max_mp = NULL;
16372 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16373 Mnode * mp;
16375 /* If the minipool starts before the end of FIX->INSN then this FIX
16376 can not be placed into the current pool. Furthermore, adding the
16377 new constant pool entry may cause the pool to start FIX_SIZE bytes
16378 earlier. */
16379 if (minipool_vector_head &&
16380 (fix->address + get_attr_length (fix->insn)
16381 >= minipool_vector_head->max_address - fix->fix_size))
16382 return NULL;
16384 /* Scan the pool to see if a constant with the same value has
16385 already been added. While we are doing this, also note the
16386 location where we must insert the constant if it doesn't already
16387 exist. */
16388 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16390 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16391 && fix->mode == mp->mode
16392 && (!LABEL_P (fix->value)
16393 || (CODE_LABEL_NUMBER (fix->value)
16394 == CODE_LABEL_NUMBER (mp->value)))
16395 && rtx_equal_p (fix->value, mp->value))
16397 /* More than one fix references this entry. */
16398 mp->refcount++;
16399 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16402 /* Note the insertion point if necessary. */
16403 if (max_mp == NULL
16404 && mp->max_address > max_address)
16405 max_mp = mp;
16407 /* If we are inserting an 8-bytes aligned quantity and
16408 we have not already found an insertion point, then
16409 make sure that all such 8-byte aligned quantities are
16410 placed at the start of the pool. */
16411 if (ARM_DOUBLEWORD_ALIGN
16412 && max_mp == NULL
16413 && fix->fix_size >= 8
16414 && mp->fix_size < 8)
16416 max_mp = mp;
16417 max_address = mp->max_address;
16421 /* The value is not currently in the minipool, so we need to create
16422 a new entry for it. If MAX_MP is NULL, the entry will be put on
16423 the end of the list since the placement is less constrained than
16424 any existing entry. Otherwise, we insert the new fix before
16425 MAX_MP and, if necessary, adjust the constraints on the other
16426 entries. */
16427 mp = XNEW (Mnode);
16428 mp->fix_size = fix->fix_size;
16429 mp->mode = fix->mode;
16430 mp->value = fix->value;
16431 mp->refcount = 1;
16432 /* Not yet required for a backwards ref. */
16433 mp->min_address = -65536;
16435 if (max_mp == NULL)
16437 mp->max_address = max_address;
16438 mp->next = NULL;
16439 mp->prev = minipool_vector_tail;
16441 if (mp->prev == NULL)
16443 minipool_vector_head = mp;
16444 minipool_vector_label = gen_label_rtx ();
16446 else
16447 mp->prev->next = mp;
16449 minipool_vector_tail = mp;
16451 else
16453 if (max_address > max_mp->max_address - mp->fix_size)
16454 mp->max_address = max_mp->max_address - mp->fix_size;
16455 else
16456 mp->max_address = max_address;
16458 mp->next = max_mp;
16459 mp->prev = max_mp->prev;
16460 max_mp->prev = mp;
16461 if (mp->prev != NULL)
16462 mp->prev->next = mp;
16463 else
16464 minipool_vector_head = mp;
16467 /* Save the new entry. */
16468 max_mp = mp;
16470 /* Scan over the preceding entries and adjust their addresses as
16471 required. */
16472 while (mp->prev != NULL
16473 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16475 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16476 mp = mp->prev;
16479 return max_mp;
16482 static Mnode *
16483 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16484 HOST_WIDE_INT min_address)
16486 HOST_WIDE_INT offset;
16488 /* The code below assumes these are different. */
16489 gcc_assert (mp != min_mp);
16491 if (min_mp == NULL)
16493 if (min_address > mp->min_address)
16494 mp->min_address = min_address;
16496 else
16498 /* We will adjust this below if it is too loose. */
16499 mp->min_address = min_address;
16501 /* Unlink MP from its current position. Since min_mp is non-null,
16502 mp->next must be non-null. */
16503 mp->next->prev = mp->prev;
16504 if (mp->prev != NULL)
16505 mp->prev->next = mp->next;
16506 else
16507 minipool_vector_head = mp->next;
16509 /* Reinsert it after MIN_MP. */
16510 mp->prev = min_mp;
16511 mp->next = min_mp->next;
16512 min_mp->next = mp;
16513 if (mp->next != NULL)
16514 mp->next->prev = mp;
16515 else
16516 minipool_vector_tail = mp;
16519 min_mp = mp;
16521 offset = 0;
16522 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16524 mp->offset = offset;
16525 if (mp->refcount > 0)
16526 offset += mp->fix_size;
16528 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16529 mp->next->min_address = mp->min_address + mp->fix_size;
16532 return min_mp;
16535 /* Add a constant to the minipool for a backward reference. Returns the
16536 node added or NULL if the constant will not fit in this pool.
16538 Note that the code for insertion for a backwards reference can be
16539 somewhat confusing because the calculated offsets for each fix do
16540 not take into account the size of the pool (which is still under
16541 construction. */
16542 static Mnode *
16543 add_minipool_backward_ref (Mfix *fix)
16545 /* If set, min_mp is the last pool_entry that has a lower constraint
16546 than the one we are trying to add. */
16547 Mnode *min_mp = NULL;
16548 /* This can be negative, since it is only a constraint. */
16549 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16550 Mnode *mp;
16552 /* If we can't reach the current pool from this insn, or if we can't
16553 insert this entry at the end of the pool without pushing other
16554 fixes out of range, then we don't try. This ensures that we
16555 can't fail later on. */
16556 if (min_address >= minipool_barrier->address
16557 || (minipool_vector_tail->min_address + fix->fix_size
16558 >= minipool_barrier->address))
16559 return NULL;
16561 /* Scan the pool to see if a constant with the same value has
16562 already been added. While we are doing this, also note the
16563 location where we must insert the constant if it doesn't already
16564 exist. */
16565 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16567 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16568 && fix->mode == mp->mode
16569 && (!LABEL_P (fix->value)
16570 || (CODE_LABEL_NUMBER (fix->value)
16571 == CODE_LABEL_NUMBER (mp->value)))
16572 && rtx_equal_p (fix->value, mp->value)
16573 /* Check that there is enough slack to move this entry to the
16574 end of the table (this is conservative). */
16575 && (mp->max_address
16576 > (minipool_barrier->address
16577 + minipool_vector_tail->offset
16578 + minipool_vector_tail->fix_size)))
16580 mp->refcount++;
16581 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16584 if (min_mp != NULL)
16585 mp->min_address += fix->fix_size;
16586 else
16588 /* Note the insertion point if necessary. */
16589 if (mp->min_address < min_address)
16591 /* For now, we do not allow the insertion of 8-byte alignment
16592 requiring nodes anywhere but at the start of the pool. */
16593 if (ARM_DOUBLEWORD_ALIGN
16594 && fix->fix_size >= 8 && mp->fix_size < 8)
16595 return NULL;
16596 else
16597 min_mp = mp;
16599 else if (mp->max_address
16600 < minipool_barrier->address + mp->offset + fix->fix_size)
16602 /* Inserting before this entry would push the fix beyond
16603 its maximum address (which can happen if we have
16604 re-located a forwards fix); force the new fix to come
16605 after it. */
16606 if (ARM_DOUBLEWORD_ALIGN
16607 && fix->fix_size >= 8 && mp->fix_size < 8)
16608 return NULL;
16609 else
16611 min_mp = mp;
16612 min_address = mp->min_address + fix->fix_size;
16615 /* Do not insert a non-8-byte aligned quantity before 8-byte
16616 aligned quantities. */
16617 else if (ARM_DOUBLEWORD_ALIGN
16618 && fix->fix_size < 8
16619 && mp->fix_size >= 8)
16621 min_mp = mp;
16622 min_address = mp->min_address + fix->fix_size;
16627 /* We need to create a new entry. */
16628 mp = XNEW (Mnode);
16629 mp->fix_size = fix->fix_size;
16630 mp->mode = fix->mode;
16631 mp->value = fix->value;
16632 mp->refcount = 1;
16633 mp->max_address = minipool_barrier->address + 65536;
16635 mp->min_address = min_address;
16637 if (min_mp == NULL)
16639 mp->prev = NULL;
16640 mp->next = minipool_vector_head;
16642 if (mp->next == NULL)
16644 minipool_vector_tail = mp;
16645 minipool_vector_label = gen_label_rtx ();
16647 else
16648 mp->next->prev = mp;
16650 minipool_vector_head = mp;
16652 else
16654 mp->next = min_mp->next;
16655 mp->prev = min_mp;
16656 min_mp->next = mp;
16658 if (mp->next != NULL)
16659 mp->next->prev = mp;
16660 else
16661 minipool_vector_tail = mp;
16664 /* Save the new entry. */
16665 min_mp = mp;
16667 if (mp->prev)
16668 mp = mp->prev;
16669 else
16670 mp->offset = 0;
16672 /* Scan over the following entries and adjust their offsets. */
16673 while (mp->next != NULL)
16675 if (mp->next->min_address < mp->min_address + mp->fix_size)
16676 mp->next->min_address = mp->min_address + mp->fix_size;
16678 if (mp->refcount)
16679 mp->next->offset = mp->offset + mp->fix_size;
16680 else
16681 mp->next->offset = mp->offset;
16683 mp = mp->next;
16686 return min_mp;
16689 static void
16690 assign_minipool_offsets (Mfix *barrier)
16692 HOST_WIDE_INT offset = 0;
16693 Mnode *mp;
16695 minipool_barrier = barrier;
16697 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16699 mp->offset = offset;
16701 if (mp->refcount > 0)
16702 offset += mp->fix_size;
16706 /* Output the literal table */
16707 static void
16708 dump_minipool (rtx_insn *scan)
16710 Mnode * mp;
16711 Mnode * nmp;
16712 int align64 = 0;
16714 if (ARM_DOUBLEWORD_ALIGN)
16715 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16716 if (mp->refcount > 0 && mp->fix_size >= 8)
16718 align64 = 1;
16719 break;
16722 if (dump_file)
16723 fprintf (dump_file,
16724 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16725 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16727 scan = emit_label_after (gen_label_rtx (), scan);
16728 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16729 scan = emit_label_after (minipool_vector_label, scan);
16731 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16733 if (mp->refcount > 0)
16735 if (dump_file)
16737 fprintf (dump_file,
16738 ";; Offset %u, min %ld, max %ld ",
16739 (unsigned) mp->offset, (unsigned long) mp->min_address,
16740 (unsigned long) mp->max_address);
16741 arm_print_value (dump_file, mp->value);
16742 fputc ('\n', dump_file);
16745 switch (GET_MODE_SIZE (mp->mode))
16747 #ifdef HAVE_consttable_1
16748 case 1:
16749 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16750 break;
16752 #endif
16753 #ifdef HAVE_consttable_2
16754 case 2:
16755 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16756 break;
16758 #endif
16759 #ifdef HAVE_consttable_4
16760 case 4:
16761 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16762 break;
16764 #endif
16765 #ifdef HAVE_consttable_8
16766 case 8:
16767 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16768 break;
16770 #endif
16771 #ifdef HAVE_consttable_16
16772 case 16:
16773 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16774 break;
16776 #endif
16777 default:
16778 gcc_unreachable ();
16782 nmp = mp->next;
16783 free (mp);
16786 minipool_vector_head = minipool_vector_tail = NULL;
16787 scan = emit_insn_after (gen_consttable_end (), scan);
16788 scan = emit_barrier_after (scan);
16791 /* Return the cost of forcibly inserting a barrier after INSN. */
16792 static int
16793 arm_barrier_cost (rtx insn)
16795 /* Basing the location of the pool on the loop depth is preferable,
16796 but at the moment, the basic block information seems to be
16797 corrupt by this stage of the compilation. */
16798 int base_cost = 50;
16799 rtx next = next_nonnote_insn (insn);
16801 if (next != NULL && LABEL_P (next))
16802 base_cost -= 20;
16804 switch (GET_CODE (insn))
16806 case CODE_LABEL:
16807 /* It will always be better to place the table before the label, rather
16808 than after it. */
16809 return 50;
16811 case INSN:
16812 case CALL_INSN:
16813 return base_cost;
16815 case JUMP_INSN:
16816 return base_cost - 10;
16818 default:
16819 return base_cost + 10;
16823 /* Find the best place in the insn stream in the range
16824 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16825 Create the barrier by inserting a jump and add a new fix entry for
16826 it. */
16827 static Mfix *
16828 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16830 HOST_WIDE_INT count = 0;
16831 rtx_barrier *barrier;
16832 rtx_insn *from = fix->insn;
16833 /* The instruction after which we will insert the jump. */
16834 rtx_insn *selected = NULL;
16835 int selected_cost;
16836 /* The address at which the jump instruction will be placed. */
16837 HOST_WIDE_INT selected_address;
16838 Mfix * new_fix;
16839 HOST_WIDE_INT max_count = max_address - fix->address;
16840 rtx_code_label *label = gen_label_rtx ();
16842 selected_cost = arm_barrier_cost (from);
16843 selected_address = fix->address;
16845 while (from && count < max_count)
16847 rtx_jump_table_data *tmp;
16848 int new_cost;
16850 /* This code shouldn't have been called if there was a natural barrier
16851 within range. */
16852 gcc_assert (!BARRIER_P (from));
16854 /* Count the length of this insn. This must stay in sync with the
16855 code that pushes minipool fixes. */
16856 if (LABEL_P (from))
16857 count += get_label_padding (from);
16858 else
16859 count += get_attr_length (from);
16861 /* If there is a jump table, add its length. */
16862 if (tablejump_p (from, NULL, &tmp))
16864 count += get_jump_table_size (tmp);
16866 /* Jump tables aren't in a basic block, so base the cost on
16867 the dispatch insn. If we select this location, we will
16868 still put the pool after the table. */
16869 new_cost = arm_barrier_cost (from);
16871 if (count < max_count
16872 && (!selected || new_cost <= selected_cost))
16874 selected = tmp;
16875 selected_cost = new_cost;
16876 selected_address = fix->address + count;
16879 /* Continue after the dispatch table. */
16880 from = NEXT_INSN (tmp);
16881 continue;
16884 new_cost = arm_barrier_cost (from);
16886 if (count < max_count
16887 && (!selected || new_cost <= selected_cost))
16889 selected = from;
16890 selected_cost = new_cost;
16891 selected_address = fix->address + count;
16894 from = NEXT_INSN (from);
16897 /* Make sure that we found a place to insert the jump. */
16898 gcc_assert (selected);
16900 /* Make sure we do not split a call and its corresponding
16901 CALL_ARG_LOCATION note. */
16902 if (CALL_P (selected))
16904 rtx_insn *next = NEXT_INSN (selected);
16905 if (next && NOTE_P (next)
16906 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16907 selected = next;
16910 /* Create a new JUMP_INSN that branches around a barrier. */
16911 from = emit_jump_insn_after (gen_jump (label), selected);
16912 JUMP_LABEL (from) = label;
16913 barrier = emit_barrier_after (from);
16914 emit_label_after (label, barrier);
16916 /* Create a minipool barrier entry for the new barrier. */
16917 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16918 new_fix->insn = barrier;
16919 new_fix->address = selected_address;
16920 new_fix->next = fix->next;
16921 fix->next = new_fix;
16923 return new_fix;
16926 /* Record that there is a natural barrier in the insn stream at
16927 ADDRESS. */
16928 static void
16929 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16931 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16933 fix->insn = insn;
16934 fix->address = address;
16936 fix->next = NULL;
16937 if (minipool_fix_head != NULL)
16938 minipool_fix_tail->next = fix;
16939 else
16940 minipool_fix_head = fix;
16942 minipool_fix_tail = fix;
16945 /* Record INSN, which will need fixing up to load a value from the
16946 minipool. ADDRESS is the offset of the insn since the start of the
16947 function; LOC is a pointer to the part of the insn which requires
16948 fixing; VALUE is the constant that must be loaded, which is of type
16949 MODE. */
16950 static void
16951 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16952 machine_mode mode, rtx value)
16954 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16956 fix->insn = insn;
16957 fix->address = address;
16958 fix->loc = loc;
16959 fix->mode = mode;
16960 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16961 fix->value = value;
16962 fix->forwards = get_attr_pool_range (insn);
16963 fix->backwards = get_attr_neg_pool_range (insn);
16964 fix->minipool = NULL;
16966 /* If an insn doesn't have a range defined for it, then it isn't
16967 expecting to be reworked by this code. Better to stop now than
16968 to generate duff assembly code. */
16969 gcc_assert (fix->forwards || fix->backwards);
16971 /* If an entry requires 8-byte alignment then assume all constant pools
16972 require 4 bytes of padding. Trying to do this later on a per-pool
16973 basis is awkward because existing pool entries have to be modified. */
16974 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16975 minipool_pad = 4;
16977 if (dump_file)
16979 fprintf (dump_file,
16980 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16981 GET_MODE_NAME (mode),
16982 INSN_UID (insn), (unsigned long) address,
16983 -1 * (long)fix->backwards, (long)fix->forwards);
16984 arm_print_value (dump_file, fix->value);
16985 fprintf (dump_file, "\n");
16988 /* Add it to the chain of fixes. */
16989 fix->next = NULL;
16991 if (minipool_fix_head != NULL)
16992 minipool_fix_tail->next = fix;
16993 else
16994 minipool_fix_head = fix;
16996 minipool_fix_tail = fix;
16999 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
17000 Returns the number of insns needed, or 99 if we always want to synthesize
17001 the value. */
17003 arm_max_const_double_inline_cost ()
17005 /* Let the value get synthesized to avoid the use of literal pools. */
17006 if (arm_disable_literal_pool)
17007 return 99;
17009 return ((optimize_size || arm_ld_sched) ? 3 : 4);
17012 /* Return the cost of synthesizing a 64-bit constant VAL inline.
17013 Returns the number of insns needed, or 99 if we don't know how to
17014 do it. */
17016 arm_const_double_inline_cost (rtx val)
17018 rtx lowpart, highpart;
17019 machine_mode mode;
17021 mode = GET_MODE (val);
17023 if (mode == VOIDmode)
17024 mode = DImode;
17026 gcc_assert (GET_MODE_SIZE (mode) == 8);
17028 lowpart = gen_lowpart (SImode, val);
17029 highpart = gen_highpart_mode (SImode, mode, val);
17031 gcc_assert (CONST_INT_P (lowpart));
17032 gcc_assert (CONST_INT_P (highpart));
17034 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
17035 NULL_RTX, NULL_RTX, 0, 0)
17036 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
17037 NULL_RTX, NULL_RTX, 0, 0));
17040 /* Cost of loading a SImode constant. */
17041 static inline int
17042 arm_const_inline_cost (enum rtx_code code, rtx val)
17044 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
17045 NULL_RTX, NULL_RTX, 1, 0);
17048 /* Return true if it is worthwhile to split a 64-bit constant into two
17049 32-bit operations. This is the case if optimizing for size, or
17050 if we have load delay slots, or if one 32-bit part can be done with
17051 a single data operation. */
17052 bool
17053 arm_const_double_by_parts (rtx val)
17055 machine_mode mode = GET_MODE (val);
17056 rtx part;
17058 if (optimize_size || arm_ld_sched)
17059 return true;
17061 if (mode == VOIDmode)
17062 mode = DImode;
17064 part = gen_highpart_mode (SImode, mode, val);
17066 gcc_assert (CONST_INT_P (part));
17068 if (const_ok_for_arm (INTVAL (part))
17069 || const_ok_for_arm (~INTVAL (part)))
17070 return true;
17072 part = gen_lowpart (SImode, val);
17074 gcc_assert (CONST_INT_P (part));
17076 if (const_ok_for_arm (INTVAL (part))
17077 || const_ok_for_arm (~INTVAL (part)))
17078 return true;
17080 return false;
17083 /* Return true if it is possible to inline both the high and low parts
17084 of a 64-bit constant into 32-bit data processing instructions. */
17085 bool
17086 arm_const_double_by_immediates (rtx val)
17088 machine_mode mode = GET_MODE (val);
17089 rtx part;
17091 if (mode == VOIDmode)
17092 mode = DImode;
17094 part = gen_highpart_mode (SImode, mode, val);
17096 gcc_assert (CONST_INT_P (part));
17098 if (!const_ok_for_arm (INTVAL (part)))
17099 return false;
17101 part = gen_lowpart (SImode, val);
17103 gcc_assert (CONST_INT_P (part));
17105 if (!const_ok_for_arm (INTVAL (part)))
17106 return false;
17108 return true;
17111 /* Scan INSN and note any of its operands that need fixing.
17112 If DO_PUSHES is false we do not actually push any of the fixups
17113 needed. */
17114 static void
17115 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17117 int opno;
17119 extract_constrain_insn (insn);
17121 if (recog_data.n_alternatives == 0)
17122 return;
17124 /* Fill in recog_op_alt with information about the constraints of
17125 this insn. */
17126 preprocess_constraints (insn);
17128 const operand_alternative *op_alt = which_op_alt ();
17129 for (opno = 0; opno < recog_data.n_operands; opno++)
17131 /* Things we need to fix can only occur in inputs. */
17132 if (recog_data.operand_type[opno] != OP_IN)
17133 continue;
17135 /* If this alternative is a memory reference, then any mention
17136 of constants in this alternative is really to fool reload
17137 into allowing us to accept one there. We need to fix them up
17138 now so that we output the right code. */
17139 if (op_alt[opno].memory_ok)
17141 rtx op = recog_data.operand[opno];
17143 if (CONSTANT_P (op))
17145 if (do_pushes)
17146 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17147 recog_data.operand_mode[opno], op);
17149 else if (MEM_P (op)
17150 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17151 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17153 if (do_pushes)
17155 rtx cop = avoid_constant_pool_reference (op);
17157 /* Casting the address of something to a mode narrower
17158 than a word can cause avoid_constant_pool_reference()
17159 to return the pool reference itself. That's no good to
17160 us here. Lets just hope that we can use the
17161 constant pool value directly. */
17162 if (op == cop)
17163 cop = get_pool_constant (XEXP (op, 0));
17165 push_minipool_fix (insn, address,
17166 recog_data.operand_loc[opno],
17167 recog_data.operand_mode[opno], cop);
17174 return;
17177 /* Rewrite move insn into subtract of 0 if the condition codes will
17178 be useful in next conditional jump insn. */
17180 static void
17181 thumb1_reorg (void)
17183 basic_block bb;
17185 FOR_EACH_BB_FN (bb, cfun)
17187 rtx dest, src;
17188 rtx pat, op0, set = NULL;
17189 rtx_insn *prev, *insn = BB_END (bb);
17190 bool insn_clobbered = false;
17192 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17193 insn = PREV_INSN (insn);
17195 /* Find the last cbranchsi4_insn in basic block BB. */
17196 if (insn == BB_HEAD (bb)
17197 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17198 continue;
17200 /* Get the register with which we are comparing. */
17201 pat = PATTERN (insn);
17202 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
17204 /* Find the first flag setting insn before INSN in basic block BB. */
17205 gcc_assert (insn != BB_HEAD (bb));
17206 for (prev = PREV_INSN (insn);
17207 (!insn_clobbered
17208 && prev != BB_HEAD (bb)
17209 && (NOTE_P (prev)
17210 || DEBUG_INSN_P (prev)
17211 || ((set = single_set (prev)) != NULL
17212 && get_attr_conds (prev) == CONDS_NOCOND)));
17213 prev = PREV_INSN (prev))
17215 if (reg_set_p (op0, prev))
17216 insn_clobbered = true;
17219 /* Skip if op0 is clobbered by insn other than prev. */
17220 if (insn_clobbered)
17221 continue;
17223 if (!set)
17224 continue;
17226 dest = SET_DEST (set);
17227 src = SET_SRC (set);
17228 if (!low_register_operand (dest, SImode)
17229 || !low_register_operand (src, SImode))
17230 continue;
17232 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17233 in INSN. Both src and dest of the move insn are checked. */
17234 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17236 dest = copy_rtx (dest);
17237 src = copy_rtx (src);
17238 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17239 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
17240 INSN_CODE (prev) = -1;
17241 /* Set test register in INSN to dest. */
17242 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
17243 INSN_CODE (insn) = -1;
17248 /* Convert instructions to their cc-clobbering variant if possible, since
17249 that allows us to use smaller encodings. */
17251 static void
17252 thumb2_reorg (void)
17254 basic_block bb;
17255 regset_head live;
17257 INIT_REG_SET (&live);
17259 /* We are freeing block_for_insn in the toplev to keep compatibility
17260 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17261 compute_bb_for_insn ();
17262 df_analyze ();
17264 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17266 FOR_EACH_BB_FN (bb, cfun)
17268 if (current_tune->disparage_flag_setting_t16_encodings
17269 && optimize_bb_for_speed_p (bb))
17270 continue;
17272 rtx_insn *insn;
17273 Convert_Action action = SKIP;
17274 Convert_Action action_for_partial_flag_setting
17275 = (current_tune->disparage_partial_flag_setting_t16_encodings
17276 && optimize_bb_for_speed_p (bb))
17277 ? SKIP : CONV;
17279 COPY_REG_SET (&live, DF_LR_OUT (bb));
17280 df_simulate_initialize_backwards (bb, &live);
17281 FOR_BB_INSNS_REVERSE (bb, insn)
17283 if (NONJUMP_INSN_P (insn)
17284 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17285 && GET_CODE (PATTERN (insn)) == SET)
17287 action = SKIP;
17288 rtx pat = PATTERN (insn);
17289 rtx dst = XEXP (pat, 0);
17290 rtx src = XEXP (pat, 1);
17291 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17293 if (UNARY_P (src) || BINARY_P (src))
17294 op0 = XEXP (src, 0);
17296 if (BINARY_P (src))
17297 op1 = XEXP (src, 1);
17299 if (low_register_operand (dst, SImode))
17301 switch (GET_CODE (src))
17303 case PLUS:
17304 /* Adding two registers and storing the result
17305 in the first source is already a 16-bit
17306 operation. */
17307 if (rtx_equal_p (dst, op0)
17308 && register_operand (op1, SImode))
17309 break;
17311 if (low_register_operand (op0, SImode))
17313 /* ADDS <Rd>,<Rn>,<Rm> */
17314 if (low_register_operand (op1, SImode))
17315 action = CONV;
17316 /* ADDS <Rdn>,#<imm8> */
17317 /* SUBS <Rdn>,#<imm8> */
17318 else if (rtx_equal_p (dst, op0)
17319 && CONST_INT_P (op1)
17320 && IN_RANGE (INTVAL (op1), -255, 255))
17321 action = CONV;
17322 /* ADDS <Rd>,<Rn>,#<imm3> */
17323 /* SUBS <Rd>,<Rn>,#<imm3> */
17324 else if (CONST_INT_P (op1)
17325 && IN_RANGE (INTVAL (op1), -7, 7))
17326 action = CONV;
17328 /* ADCS <Rd>, <Rn> */
17329 else if (GET_CODE (XEXP (src, 0)) == PLUS
17330 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17331 && low_register_operand (XEXP (XEXP (src, 0), 1),
17332 SImode)
17333 && COMPARISON_P (op1)
17334 && cc_register (XEXP (op1, 0), VOIDmode)
17335 && maybe_get_arm_condition_code (op1) == ARM_CS
17336 && XEXP (op1, 1) == const0_rtx)
17337 action = CONV;
17338 break;
17340 case MINUS:
17341 /* RSBS <Rd>,<Rn>,#0
17342 Not handled here: see NEG below. */
17343 /* SUBS <Rd>,<Rn>,#<imm3>
17344 SUBS <Rdn>,#<imm8>
17345 Not handled here: see PLUS above. */
17346 /* SUBS <Rd>,<Rn>,<Rm> */
17347 if (low_register_operand (op0, SImode)
17348 && low_register_operand (op1, SImode))
17349 action = CONV;
17350 break;
17352 case MULT:
17353 /* MULS <Rdm>,<Rn>,<Rdm>
17354 As an exception to the rule, this is only used
17355 when optimizing for size since MULS is slow on all
17356 known implementations. We do not even want to use
17357 MULS in cold code, if optimizing for speed, so we
17358 test the global flag here. */
17359 if (!optimize_size)
17360 break;
17361 /* else fall through. */
17362 case AND:
17363 case IOR:
17364 case XOR:
17365 /* ANDS <Rdn>,<Rm> */
17366 if (rtx_equal_p (dst, op0)
17367 && low_register_operand (op1, SImode))
17368 action = action_for_partial_flag_setting;
17369 else if (rtx_equal_p (dst, op1)
17370 && low_register_operand (op0, SImode))
17371 action = action_for_partial_flag_setting == SKIP
17372 ? SKIP : SWAP_CONV;
17373 break;
17375 case ASHIFTRT:
17376 case ASHIFT:
17377 case LSHIFTRT:
17378 /* ASRS <Rdn>,<Rm> */
17379 /* LSRS <Rdn>,<Rm> */
17380 /* LSLS <Rdn>,<Rm> */
17381 if (rtx_equal_p (dst, op0)
17382 && low_register_operand (op1, SImode))
17383 action = action_for_partial_flag_setting;
17384 /* ASRS <Rd>,<Rm>,#<imm5> */
17385 /* LSRS <Rd>,<Rm>,#<imm5> */
17386 /* LSLS <Rd>,<Rm>,#<imm5> */
17387 else if (low_register_operand (op0, SImode)
17388 && CONST_INT_P (op1)
17389 && IN_RANGE (INTVAL (op1), 0, 31))
17390 action = action_for_partial_flag_setting;
17391 break;
17393 case ROTATERT:
17394 /* RORS <Rdn>,<Rm> */
17395 if (rtx_equal_p (dst, op0)
17396 && low_register_operand (op1, SImode))
17397 action = action_for_partial_flag_setting;
17398 break;
17400 case NOT:
17401 /* MVNS <Rd>,<Rm> */
17402 if (low_register_operand (op0, SImode))
17403 action = action_for_partial_flag_setting;
17404 break;
17406 case NEG:
17407 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17408 if (low_register_operand (op0, SImode))
17409 action = CONV;
17410 break;
17412 case CONST_INT:
17413 /* MOVS <Rd>,#<imm8> */
17414 if (CONST_INT_P (src)
17415 && IN_RANGE (INTVAL (src), 0, 255))
17416 action = action_for_partial_flag_setting;
17417 break;
17419 case REG:
17420 /* MOVS and MOV<c> with registers have different
17421 encodings, so are not relevant here. */
17422 break;
17424 default:
17425 break;
17429 if (action != SKIP)
17431 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17432 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17433 rtvec vec;
17435 if (action == SWAP_CONV)
17437 src = copy_rtx (src);
17438 XEXP (src, 0) = op1;
17439 XEXP (src, 1) = op0;
17440 pat = gen_rtx_SET (VOIDmode, dst, src);
17441 vec = gen_rtvec (2, pat, clobber);
17443 else /* action == CONV */
17444 vec = gen_rtvec (2, pat, clobber);
17446 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17447 INSN_CODE (insn) = -1;
17451 if (NONDEBUG_INSN_P (insn))
17452 df_simulate_one_insn_backwards (bb, insn, &live);
17456 CLEAR_REG_SET (&live);
17459 /* Gcc puts the pool in the wrong place for ARM, since we can only
17460 load addresses a limited distance around the pc. We do some
17461 special munging to move the constant pool values to the correct
17462 point in the code. */
17463 static void
17464 arm_reorg (void)
17466 rtx_insn *insn;
17467 HOST_WIDE_INT address = 0;
17468 Mfix * fix;
17470 if (TARGET_THUMB1)
17471 thumb1_reorg ();
17472 else if (TARGET_THUMB2)
17473 thumb2_reorg ();
17475 /* Ensure all insns that must be split have been split at this point.
17476 Otherwise, the pool placement code below may compute incorrect
17477 insn lengths. Note that when optimizing, all insns have already
17478 been split at this point. */
17479 if (!optimize)
17480 split_all_insns_noflow ();
17482 minipool_fix_head = minipool_fix_tail = NULL;
17484 /* The first insn must always be a note, or the code below won't
17485 scan it properly. */
17486 insn = get_insns ();
17487 gcc_assert (NOTE_P (insn));
17488 minipool_pad = 0;
17490 /* Scan all the insns and record the operands that will need fixing. */
17491 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17493 if (BARRIER_P (insn))
17494 push_minipool_barrier (insn, address);
17495 else if (INSN_P (insn))
17497 rtx_jump_table_data *table;
17499 note_invalid_constants (insn, address, true);
17500 address += get_attr_length (insn);
17502 /* If the insn is a vector jump, add the size of the table
17503 and skip the table. */
17504 if (tablejump_p (insn, NULL, &table))
17506 address += get_jump_table_size (table);
17507 insn = table;
17510 else if (LABEL_P (insn))
17511 /* Add the worst-case padding due to alignment. We don't add
17512 the _current_ padding because the minipool insertions
17513 themselves might change it. */
17514 address += get_label_padding (insn);
17517 fix = minipool_fix_head;
17519 /* Now scan the fixups and perform the required changes. */
17520 while (fix)
17522 Mfix * ftmp;
17523 Mfix * fdel;
17524 Mfix * last_added_fix;
17525 Mfix * last_barrier = NULL;
17526 Mfix * this_fix;
17528 /* Skip any further barriers before the next fix. */
17529 while (fix && BARRIER_P (fix->insn))
17530 fix = fix->next;
17532 /* No more fixes. */
17533 if (fix == NULL)
17534 break;
17536 last_added_fix = NULL;
17538 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17540 if (BARRIER_P (ftmp->insn))
17542 if (ftmp->address >= minipool_vector_head->max_address)
17543 break;
17545 last_barrier = ftmp;
17547 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17548 break;
17550 last_added_fix = ftmp; /* Keep track of the last fix added. */
17553 /* If we found a barrier, drop back to that; any fixes that we
17554 could have reached but come after the barrier will now go in
17555 the next mini-pool. */
17556 if (last_barrier != NULL)
17558 /* Reduce the refcount for those fixes that won't go into this
17559 pool after all. */
17560 for (fdel = last_barrier->next;
17561 fdel && fdel != ftmp;
17562 fdel = fdel->next)
17564 fdel->minipool->refcount--;
17565 fdel->minipool = NULL;
17568 ftmp = last_barrier;
17570 else
17572 /* ftmp is first fix that we can't fit into this pool and
17573 there no natural barriers that we could use. Insert a
17574 new barrier in the code somewhere between the previous
17575 fix and this one, and arrange to jump around it. */
17576 HOST_WIDE_INT max_address;
17578 /* The last item on the list of fixes must be a barrier, so
17579 we can never run off the end of the list of fixes without
17580 last_barrier being set. */
17581 gcc_assert (ftmp);
17583 max_address = minipool_vector_head->max_address;
17584 /* Check that there isn't another fix that is in range that
17585 we couldn't fit into this pool because the pool was
17586 already too large: we need to put the pool before such an
17587 instruction. The pool itself may come just after the
17588 fix because create_fix_barrier also allows space for a
17589 jump instruction. */
17590 if (ftmp->address < max_address)
17591 max_address = ftmp->address + 1;
17593 last_barrier = create_fix_barrier (last_added_fix, max_address);
17596 assign_minipool_offsets (last_barrier);
17598 while (ftmp)
17600 if (!BARRIER_P (ftmp->insn)
17601 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17602 == NULL))
17603 break;
17605 ftmp = ftmp->next;
17608 /* Scan over the fixes we have identified for this pool, fixing them
17609 up and adding the constants to the pool itself. */
17610 for (this_fix = fix; this_fix && ftmp != this_fix;
17611 this_fix = this_fix->next)
17612 if (!BARRIER_P (this_fix->insn))
17614 rtx addr
17615 = plus_constant (Pmode,
17616 gen_rtx_LABEL_REF (VOIDmode,
17617 minipool_vector_label),
17618 this_fix->minipool->offset);
17619 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17622 dump_minipool (last_barrier->insn);
17623 fix = ftmp;
17626 /* From now on we must synthesize any constants that we can't handle
17627 directly. This can happen if the RTL gets split during final
17628 instruction generation. */
17629 cfun->machine->after_arm_reorg = 1;
17631 /* Free the minipool memory. */
17632 obstack_free (&minipool_obstack, minipool_startobj);
17635 /* Routines to output assembly language. */
17637 /* Return string representation of passed in real value. */
17638 static const char *
17639 fp_const_from_val (REAL_VALUE_TYPE *r)
17641 if (!fp_consts_inited)
17642 init_fp_table ();
17644 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17645 return "0";
17648 /* OPERANDS[0] is the entire list of insns that constitute pop,
17649 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17650 is in the list, UPDATE is true iff the list contains explicit
17651 update of base register. */
17652 void
17653 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17654 bool update)
17656 int i;
17657 char pattern[100];
17658 int offset;
17659 const char *conditional;
17660 int num_saves = XVECLEN (operands[0], 0);
17661 unsigned int regno;
17662 unsigned int regno_base = REGNO (operands[1]);
17664 offset = 0;
17665 offset += update ? 1 : 0;
17666 offset += return_pc ? 1 : 0;
17668 /* Is the base register in the list? */
17669 for (i = offset; i < num_saves; i++)
17671 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17672 /* If SP is in the list, then the base register must be SP. */
17673 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17674 /* If base register is in the list, there must be no explicit update. */
17675 if (regno == regno_base)
17676 gcc_assert (!update);
17679 conditional = reverse ? "%?%D0" : "%?%d0";
17680 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17682 /* Output pop (not stmfd) because it has a shorter encoding. */
17683 gcc_assert (update);
17684 sprintf (pattern, "pop%s\t{", conditional);
17686 else
17688 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17689 It's just a convention, their semantics are identical. */
17690 if (regno_base == SP_REGNUM)
17691 sprintf (pattern, "ldm%sfd\t", conditional);
17692 else if (TARGET_UNIFIED_ASM)
17693 sprintf (pattern, "ldmia%s\t", conditional);
17694 else
17695 sprintf (pattern, "ldm%sia\t", conditional);
17697 strcat (pattern, reg_names[regno_base]);
17698 if (update)
17699 strcat (pattern, "!, {");
17700 else
17701 strcat (pattern, ", {");
17704 /* Output the first destination register. */
17705 strcat (pattern,
17706 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17708 /* Output the rest of the destination registers. */
17709 for (i = offset + 1; i < num_saves; i++)
17711 strcat (pattern, ", ");
17712 strcat (pattern,
17713 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17716 strcat (pattern, "}");
17718 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17719 strcat (pattern, "^");
17721 output_asm_insn (pattern, &cond);
17725 /* Output the assembly for a store multiple. */
17727 const char *
17728 vfp_output_vstmd (rtx * operands)
17730 char pattern[100];
17731 int p;
17732 int base;
17733 int i;
17734 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17735 ? XEXP (operands[0], 0)
17736 : XEXP (XEXP (operands[0], 0), 0);
17737 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17739 if (push_p)
17740 strcpy (pattern, "vpush%?.64\t{%P1");
17741 else
17742 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17744 p = strlen (pattern);
17746 gcc_assert (REG_P (operands[1]));
17748 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17749 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17751 p += sprintf (&pattern[p], ", d%d", base + i);
17753 strcpy (&pattern[p], "}");
17755 output_asm_insn (pattern, operands);
17756 return "";
17760 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17761 number of bytes pushed. */
17763 static int
17764 vfp_emit_fstmd (int base_reg, int count)
17766 rtx par;
17767 rtx dwarf;
17768 rtx tmp, reg;
17769 int i;
17771 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17772 register pairs are stored by a store multiple insn. We avoid this
17773 by pushing an extra pair. */
17774 if (count == 2 && !arm_arch6)
17776 if (base_reg == LAST_VFP_REGNUM - 3)
17777 base_reg -= 2;
17778 count++;
17781 /* FSTMD may not store more than 16 doubleword registers at once. Split
17782 larger stores into multiple parts (up to a maximum of two, in
17783 practice). */
17784 if (count > 16)
17786 int saved;
17787 /* NOTE: base_reg is an internal register number, so each D register
17788 counts as 2. */
17789 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17790 saved += vfp_emit_fstmd (base_reg, 16);
17791 return saved;
17794 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17795 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17797 reg = gen_rtx_REG (DFmode, base_reg);
17798 base_reg += 2;
17800 XVECEXP (par, 0, 0)
17801 = gen_rtx_SET (VOIDmode,
17802 gen_frame_mem
17803 (BLKmode,
17804 gen_rtx_PRE_MODIFY (Pmode,
17805 stack_pointer_rtx,
17806 plus_constant
17807 (Pmode, stack_pointer_rtx,
17808 - (count * 8)))
17810 gen_rtx_UNSPEC (BLKmode,
17811 gen_rtvec (1, reg),
17812 UNSPEC_PUSH_MULT));
17814 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17815 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17816 RTX_FRAME_RELATED_P (tmp) = 1;
17817 XVECEXP (dwarf, 0, 0) = tmp;
17819 tmp = gen_rtx_SET (VOIDmode,
17820 gen_frame_mem (DFmode, stack_pointer_rtx),
17821 reg);
17822 RTX_FRAME_RELATED_P (tmp) = 1;
17823 XVECEXP (dwarf, 0, 1) = tmp;
17825 for (i = 1; i < count; i++)
17827 reg = gen_rtx_REG (DFmode, base_reg);
17828 base_reg += 2;
17829 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17831 tmp = gen_rtx_SET (VOIDmode,
17832 gen_frame_mem (DFmode,
17833 plus_constant (Pmode,
17834 stack_pointer_rtx,
17835 i * 8)),
17836 reg);
17837 RTX_FRAME_RELATED_P (tmp) = 1;
17838 XVECEXP (dwarf, 0, i + 1) = tmp;
17841 par = emit_insn (par);
17842 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17843 RTX_FRAME_RELATED_P (par) = 1;
17845 return count * 8;
17848 /* Emit a call instruction with pattern PAT. ADDR is the address of
17849 the call target. */
17851 void
17852 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17854 rtx insn;
17856 insn = emit_call_insn (pat);
17858 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17859 If the call might use such an entry, add a use of the PIC register
17860 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17861 if (TARGET_VXWORKS_RTP
17862 && flag_pic
17863 && !sibcall
17864 && GET_CODE (addr) == SYMBOL_REF
17865 && (SYMBOL_REF_DECL (addr)
17866 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17867 : !SYMBOL_REF_LOCAL_P (addr)))
17869 require_pic_register ();
17870 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17873 if (TARGET_AAPCS_BASED)
17875 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17876 linker. We need to add an IP clobber to allow setting
17877 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17878 is not needed since it's a fixed register. */
17879 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17880 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17884 /* Output a 'call' insn. */
17885 const char *
17886 output_call (rtx *operands)
17888 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17890 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17891 if (REGNO (operands[0]) == LR_REGNUM)
17893 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17894 output_asm_insn ("mov%?\t%0, %|lr", operands);
17897 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17899 if (TARGET_INTERWORK || arm_arch4t)
17900 output_asm_insn ("bx%?\t%0", operands);
17901 else
17902 output_asm_insn ("mov%?\t%|pc, %0", operands);
17904 return "";
17907 /* Output a 'call' insn that is a reference in memory. This is
17908 disabled for ARMv5 and we prefer a blx instead because otherwise
17909 there's a significant performance overhead. */
17910 const char *
17911 output_call_mem (rtx *operands)
17913 gcc_assert (!arm_arch5);
17914 if (TARGET_INTERWORK)
17916 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17917 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17918 output_asm_insn ("bx%?\t%|ip", operands);
17920 else if (regno_use_in (LR_REGNUM, operands[0]))
17922 /* LR is used in the memory address. We load the address in the
17923 first instruction. It's safe to use IP as the target of the
17924 load since the call will kill it anyway. */
17925 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17926 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17927 if (arm_arch4t)
17928 output_asm_insn ("bx%?\t%|ip", operands);
17929 else
17930 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17932 else
17934 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17935 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17938 return "";
17942 /* Output a move from arm registers to arm registers of a long double
17943 OPERANDS[0] is the destination.
17944 OPERANDS[1] is the source. */
17945 const char *
17946 output_mov_long_double_arm_from_arm (rtx *operands)
17948 /* We have to be careful here because the two might overlap. */
17949 int dest_start = REGNO (operands[0]);
17950 int src_start = REGNO (operands[1]);
17951 rtx ops[2];
17952 int i;
17954 if (dest_start < src_start)
17956 for (i = 0; i < 3; i++)
17958 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17959 ops[1] = gen_rtx_REG (SImode, src_start + i);
17960 output_asm_insn ("mov%?\t%0, %1", ops);
17963 else
17965 for (i = 2; i >= 0; i--)
17967 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17968 ops[1] = gen_rtx_REG (SImode, src_start + i);
17969 output_asm_insn ("mov%?\t%0, %1", ops);
17973 return "";
17976 void
17977 arm_emit_movpair (rtx dest, rtx src)
17979 /* If the src is an immediate, simplify it. */
17980 if (CONST_INT_P (src))
17982 HOST_WIDE_INT val = INTVAL (src);
17983 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17984 if ((val >> 16) & 0x0000ffff)
17985 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17986 GEN_INT (16)),
17987 GEN_INT ((val >> 16) & 0x0000ffff));
17988 return;
17990 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17991 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17994 /* Output a move between double words. It must be REG<-MEM
17995 or MEM<-REG. */
17996 const char *
17997 output_move_double (rtx *operands, bool emit, int *count)
17999 enum rtx_code code0 = GET_CODE (operands[0]);
18000 enum rtx_code code1 = GET_CODE (operands[1]);
18001 rtx otherops[3];
18002 if (count)
18003 *count = 1;
18005 /* The only case when this might happen is when
18006 you are looking at the length of a DImode instruction
18007 that has an invalid constant in it. */
18008 if (code0 == REG && code1 != MEM)
18010 gcc_assert (!emit);
18011 *count = 2;
18012 return "";
18015 if (code0 == REG)
18017 unsigned int reg0 = REGNO (operands[0]);
18019 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
18021 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
18023 switch (GET_CODE (XEXP (operands[1], 0)))
18025 case REG:
18027 if (emit)
18029 if (TARGET_LDRD
18030 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18031 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
18032 else
18033 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18035 break;
18037 case PRE_INC:
18038 gcc_assert (TARGET_LDRD);
18039 if (emit)
18040 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
18041 break;
18043 case PRE_DEC:
18044 if (emit)
18046 if (TARGET_LDRD)
18047 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
18048 else
18049 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
18051 break;
18053 case POST_INC:
18054 if (emit)
18056 if (TARGET_LDRD)
18057 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
18058 else
18059 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
18061 break;
18063 case POST_DEC:
18064 gcc_assert (TARGET_LDRD);
18065 if (emit)
18066 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
18067 break;
18069 case PRE_MODIFY:
18070 case POST_MODIFY:
18071 /* Autoicrement addressing modes should never have overlapping
18072 base and destination registers, and overlapping index registers
18073 are already prohibited, so this doesn't need to worry about
18074 fix_cm3_ldrd. */
18075 otherops[0] = operands[0];
18076 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18077 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18079 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18081 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18083 /* Registers overlap so split out the increment. */
18084 if (emit)
18086 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18087 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
18089 if (count)
18090 *count = 2;
18092 else
18094 /* Use a single insn if we can.
18095 FIXME: IWMMXT allows offsets larger than ldrd can
18096 handle, fix these up with a pair of ldr. */
18097 if (TARGET_THUMB2
18098 || !CONST_INT_P (otherops[2])
18099 || (INTVAL (otherops[2]) > -256
18100 && INTVAL (otherops[2]) < 256))
18102 if (emit)
18103 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
18105 else
18107 if (emit)
18109 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18110 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18112 if (count)
18113 *count = 2;
18118 else
18120 /* Use a single insn if we can.
18121 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18122 fix these up with a pair of ldr. */
18123 if (TARGET_THUMB2
18124 || !CONST_INT_P (otherops[2])
18125 || (INTVAL (otherops[2]) > -256
18126 && INTVAL (otherops[2]) < 256))
18128 if (emit)
18129 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
18131 else
18133 if (emit)
18135 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18136 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18138 if (count)
18139 *count = 2;
18142 break;
18144 case LABEL_REF:
18145 case CONST:
18146 /* We might be able to use ldrd %0, %1 here. However the range is
18147 different to ldr/adr, and it is broken on some ARMv7-M
18148 implementations. */
18149 /* Use the second register of the pair to avoid problematic
18150 overlap. */
18151 otherops[1] = operands[1];
18152 if (emit)
18153 output_asm_insn ("adr%?\t%0, %1", otherops);
18154 operands[1] = otherops[0];
18155 if (emit)
18157 if (TARGET_LDRD)
18158 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18159 else
18160 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
18163 if (count)
18164 *count = 2;
18165 break;
18167 /* ??? This needs checking for thumb2. */
18168 default:
18169 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18170 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18172 otherops[0] = operands[0];
18173 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18174 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18176 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18178 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18180 switch ((int) INTVAL (otherops[2]))
18182 case -8:
18183 if (emit)
18184 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
18185 return "";
18186 case -4:
18187 if (TARGET_THUMB2)
18188 break;
18189 if (emit)
18190 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
18191 return "";
18192 case 4:
18193 if (TARGET_THUMB2)
18194 break;
18195 if (emit)
18196 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
18197 return "";
18200 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18201 operands[1] = otherops[0];
18202 if (TARGET_LDRD
18203 && (REG_P (otherops[2])
18204 || TARGET_THUMB2
18205 || (CONST_INT_P (otherops[2])
18206 && INTVAL (otherops[2]) > -256
18207 && INTVAL (otherops[2]) < 256)))
18209 if (reg_overlap_mentioned_p (operands[0],
18210 otherops[2]))
18212 /* Swap base and index registers over to
18213 avoid a conflict. */
18214 std::swap (otherops[1], otherops[2]);
18216 /* If both registers conflict, it will usually
18217 have been fixed by a splitter. */
18218 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18219 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18221 if (emit)
18223 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18224 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18226 if (count)
18227 *count = 2;
18229 else
18231 otherops[0] = operands[0];
18232 if (emit)
18233 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
18235 return "";
18238 if (CONST_INT_P (otherops[2]))
18240 if (emit)
18242 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18243 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18244 else
18245 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18248 else
18250 if (emit)
18251 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18254 else
18256 if (emit)
18257 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18260 if (count)
18261 *count = 2;
18263 if (TARGET_LDRD)
18264 return "ldr%(d%)\t%0, [%1]";
18266 return "ldm%(ia%)\t%1, %M0";
18268 else
18270 otherops[1] = adjust_address (operands[1], SImode, 4);
18271 /* Take care of overlapping base/data reg. */
18272 if (reg_mentioned_p (operands[0], operands[1]))
18274 if (emit)
18276 output_asm_insn ("ldr%?\t%0, %1", otherops);
18277 output_asm_insn ("ldr%?\t%0, %1", operands);
18279 if (count)
18280 *count = 2;
18283 else
18285 if (emit)
18287 output_asm_insn ("ldr%?\t%0, %1", operands);
18288 output_asm_insn ("ldr%?\t%0, %1", otherops);
18290 if (count)
18291 *count = 2;
18296 else
18298 /* Constraints should ensure this. */
18299 gcc_assert (code0 == MEM && code1 == REG);
18300 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18301 || (TARGET_ARM && TARGET_LDRD));
18303 switch (GET_CODE (XEXP (operands[0], 0)))
18305 case REG:
18306 if (emit)
18308 if (TARGET_LDRD)
18309 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18310 else
18311 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18313 break;
18315 case PRE_INC:
18316 gcc_assert (TARGET_LDRD);
18317 if (emit)
18318 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18319 break;
18321 case PRE_DEC:
18322 if (emit)
18324 if (TARGET_LDRD)
18325 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18326 else
18327 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18329 break;
18331 case POST_INC:
18332 if (emit)
18334 if (TARGET_LDRD)
18335 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18336 else
18337 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18339 break;
18341 case POST_DEC:
18342 gcc_assert (TARGET_LDRD);
18343 if (emit)
18344 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18345 break;
18347 case PRE_MODIFY:
18348 case POST_MODIFY:
18349 otherops[0] = operands[1];
18350 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18351 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18353 /* IWMMXT allows offsets larger than ldrd can handle,
18354 fix these up with a pair of ldr. */
18355 if (!TARGET_THUMB2
18356 && CONST_INT_P (otherops[2])
18357 && (INTVAL(otherops[2]) <= -256
18358 || INTVAL(otherops[2]) >= 256))
18360 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18362 if (emit)
18364 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18365 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18367 if (count)
18368 *count = 2;
18370 else
18372 if (emit)
18374 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18375 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18377 if (count)
18378 *count = 2;
18381 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18383 if (emit)
18384 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18386 else
18388 if (emit)
18389 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18391 break;
18393 case PLUS:
18394 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18395 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18397 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18399 case -8:
18400 if (emit)
18401 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18402 return "";
18404 case -4:
18405 if (TARGET_THUMB2)
18406 break;
18407 if (emit)
18408 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18409 return "";
18411 case 4:
18412 if (TARGET_THUMB2)
18413 break;
18414 if (emit)
18415 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18416 return "";
18419 if (TARGET_LDRD
18420 && (REG_P (otherops[2])
18421 || TARGET_THUMB2
18422 || (CONST_INT_P (otherops[2])
18423 && INTVAL (otherops[2]) > -256
18424 && INTVAL (otherops[2]) < 256)))
18426 otherops[0] = operands[1];
18427 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18428 if (emit)
18429 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18430 return "";
18432 /* Fall through */
18434 default:
18435 otherops[0] = adjust_address (operands[0], SImode, 4);
18436 otherops[1] = operands[1];
18437 if (emit)
18439 output_asm_insn ("str%?\t%1, %0", operands);
18440 output_asm_insn ("str%?\t%H1, %0", otherops);
18442 if (count)
18443 *count = 2;
18447 return "";
18450 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18451 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18453 const char *
18454 output_move_quad (rtx *operands)
18456 if (REG_P (operands[0]))
18458 /* Load, or reg->reg move. */
18460 if (MEM_P (operands[1]))
18462 switch (GET_CODE (XEXP (operands[1], 0)))
18464 case REG:
18465 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18466 break;
18468 case LABEL_REF:
18469 case CONST:
18470 output_asm_insn ("adr%?\t%0, %1", operands);
18471 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18472 break;
18474 default:
18475 gcc_unreachable ();
18478 else
18480 rtx ops[2];
18481 int dest, src, i;
18483 gcc_assert (REG_P (operands[1]));
18485 dest = REGNO (operands[0]);
18486 src = REGNO (operands[1]);
18488 /* This seems pretty dumb, but hopefully GCC won't try to do it
18489 very often. */
18490 if (dest < src)
18491 for (i = 0; i < 4; i++)
18493 ops[0] = gen_rtx_REG (SImode, dest + i);
18494 ops[1] = gen_rtx_REG (SImode, src + i);
18495 output_asm_insn ("mov%?\t%0, %1", ops);
18497 else
18498 for (i = 3; i >= 0; i--)
18500 ops[0] = gen_rtx_REG (SImode, dest + i);
18501 ops[1] = gen_rtx_REG (SImode, src + i);
18502 output_asm_insn ("mov%?\t%0, %1", ops);
18506 else
18508 gcc_assert (MEM_P (operands[0]));
18509 gcc_assert (REG_P (operands[1]));
18510 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18512 switch (GET_CODE (XEXP (operands[0], 0)))
18514 case REG:
18515 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18516 break;
18518 default:
18519 gcc_unreachable ();
18523 return "";
18526 /* Output a VFP load or store instruction. */
18528 const char *
18529 output_move_vfp (rtx *operands)
18531 rtx reg, mem, addr, ops[2];
18532 int load = REG_P (operands[0]);
18533 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18534 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18535 const char *templ;
18536 char buff[50];
18537 machine_mode mode;
18539 reg = operands[!load];
18540 mem = operands[load];
18542 mode = GET_MODE (reg);
18544 gcc_assert (REG_P (reg));
18545 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18546 gcc_assert (mode == SFmode
18547 || mode == DFmode
18548 || mode == SImode
18549 || mode == DImode
18550 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18551 gcc_assert (MEM_P (mem));
18553 addr = XEXP (mem, 0);
18555 switch (GET_CODE (addr))
18557 case PRE_DEC:
18558 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18559 ops[0] = XEXP (addr, 0);
18560 ops[1] = reg;
18561 break;
18563 case POST_INC:
18564 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18565 ops[0] = XEXP (addr, 0);
18566 ops[1] = reg;
18567 break;
18569 default:
18570 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18571 ops[0] = reg;
18572 ops[1] = mem;
18573 break;
18576 sprintf (buff, templ,
18577 load ? "ld" : "st",
18578 dp ? "64" : "32",
18579 dp ? "P" : "",
18580 integer_p ? "\t%@ int" : "");
18581 output_asm_insn (buff, ops);
18583 return "";
18586 /* Output a Neon double-word or quad-word load or store, or a load
18587 or store for larger structure modes.
18589 WARNING: The ordering of elements is weird in big-endian mode,
18590 because the EABI requires that vectors stored in memory appear
18591 as though they were stored by a VSTM, as required by the EABI.
18592 GCC RTL defines element ordering based on in-memory order.
18593 This can be different from the architectural ordering of elements
18594 within a NEON register. The intrinsics defined in arm_neon.h use the
18595 NEON register element ordering, not the GCC RTL element ordering.
18597 For example, the in-memory ordering of a big-endian a quadword
18598 vector with 16-bit elements when stored from register pair {d0,d1}
18599 will be (lowest address first, d0[N] is NEON register element N):
18601 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18603 When necessary, quadword registers (dN, dN+1) are moved to ARM
18604 registers from rN in the order:
18606 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18608 So that STM/LDM can be used on vectors in ARM registers, and the
18609 same memory layout will result as if VSTM/VLDM were used.
18611 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18612 possible, which allows use of appropriate alignment tags.
18613 Note that the choice of "64" is independent of the actual vector
18614 element size; this size simply ensures that the behavior is
18615 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18617 Due to limitations of those instructions, use of VST1.64/VLD1.64
18618 is not possible if:
18619 - the address contains PRE_DEC, or
18620 - the mode refers to more than 4 double-word registers
18622 In those cases, it would be possible to replace VSTM/VLDM by a
18623 sequence of instructions; this is not currently implemented since
18624 this is not certain to actually improve performance. */
18626 const char *
18627 output_move_neon (rtx *operands)
18629 rtx reg, mem, addr, ops[2];
18630 int regno, nregs, load = REG_P (operands[0]);
18631 const char *templ;
18632 char buff[50];
18633 machine_mode mode;
18635 reg = operands[!load];
18636 mem = operands[load];
18638 mode = GET_MODE (reg);
18640 gcc_assert (REG_P (reg));
18641 regno = REGNO (reg);
18642 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18643 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18644 || NEON_REGNO_OK_FOR_QUAD (regno));
18645 gcc_assert (VALID_NEON_DREG_MODE (mode)
18646 || VALID_NEON_QREG_MODE (mode)
18647 || VALID_NEON_STRUCT_MODE (mode));
18648 gcc_assert (MEM_P (mem));
18650 addr = XEXP (mem, 0);
18652 /* Strip off const from addresses like (const (plus (...))). */
18653 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18654 addr = XEXP (addr, 0);
18656 switch (GET_CODE (addr))
18658 case POST_INC:
18659 /* We have to use vldm / vstm for too-large modes. */
18660 if (nregs > 4)
18662 templ = "v%smia%%?\t%%0!, %%h1";
18663 ops[0] = XEXP (addr, 0);
18665 else
18667 templ = "v%s1.64\t%%h1, %%A0";
18668 ops[0] = mem;
18670 ops[1] = reg;
18671 break;
18673 case PRE_DEC:
18674 /* We have to use vldm / vstm in this case, since there is no
18675 pre-decrement form of the vld1 / vst1 instructions. */
18676 templ = "v%smdb%%?\t%%0!, %%h1";
18677 ops[0] = XEXP (addr, 0);
18678 ops[1] = reg;
18679 break;
18681 case POST_MODIFY:
18682 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18683 gcc_unreachable ();
18685 case REG:
18686 /* We have to use vldm / vstm for too-large modes. */
18687 if (nregs > 1)
18689 if (nregs > 4)
18690 templ = "v%smia%%?\t%%m0, %%h1";
18691 else
18692 templ = "v%s1.64\t%%h1, %%A0";
18694 ops[0] = mem;
18695 ops[1] = reg;
18696 break;
18698 /* Fall through. */
18699 case LABEL_REF:
18700 case PLUS:
18702 int i;
18703 int overlap = -1;
18704 for (i = 0; i < nregs; i++)
18706 /* We're only using DImode here because it's a convenient size. */
18707 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18708 ops[1] = adjust_address (mem, DImode, 8 * i);
18709 if (reg_overlap_mentioned_p (ops[0], mem))
18711 gcc_assert (overlap == -1);
18712 overlap = i;
18714 else
18716 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18717 output_asm_insn (buff, ops);
18720 if (overlap != -1)
18722 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18723 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18724 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18725 output_asm_insn (buff, ops);
18728 return "";
18731 default:
18732 gcc_unreachable ();
18735 sprintf (buff, templ, load ? "ld" : "st");
18736 output_asm_insn (buff, ops);
18738 return "";
18741 /* Compute and return the length of neon_mov<mode>, where <mode> is
18742 one of VSTRUCT modes: EI, OI, CI or XI. */
18744 arm_attr_length_move_neon (rtx_insn *insn)
18746 rtx reg, mem, addr;
18747 int load;
18748 machine_mode mode;
18750 extract_insn_cached (insn);
18752 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18754 mode = GET_MODE (recog_data.operand[0]);
18755 switch (mode)
18757 case EImode:
18758 case OImode:
18759 return 8;
18760 case CImode:
18761 return 12;
18762 case XImode:
18763 return 16;
18764 default:
18765 gcc_unreachable ();
18769 load = REG_P (recog_data.operand[0]);
18770 reg = recog_data.operand[!load];
18771 mem = recog_data.operand[load];
18773 gcc_assert (MEM_P (mem));
18775 mode = GET_MODE (reg);
18776 addr = XEXP (mem, 0);
18778 /* Strip off const from addresses like (const (plus (...))). */
18779 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18780 addr = XEXP (addr, 0);
18782 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18784 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18785 return insns * 4;
18787 else
18788 return 4;
18791 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18792 return zero. */
18795 arm_address_offset_is_imm (rtx_insn *insn)
18797 rtx mem, addr;
18799 extract_insn_cached (insn);
18801 if (REG_P (recog_data.operand[0]))
18802 return 0;
18804 mem = recog_data.operand[0];
18806 gcc_assert (MEM_P (mem));
18808 addr = XEXP (mem, 0);
18810 if (REG_P (addr)
18811 || (GET_CODE (addr) == PLUS
18812 && REG_P (XEXP (addr, 0))
18813 && CONST_INT_P (XEXP (addr, 1))))
18814 return 1;
18815 else
18816 return 0;
18819 /* Output an ADD r, s, #n where n may be too big for one instruction.
18820 If adding zero to one register, output nothing. */
18821 const char *
18822 output_add_immediate (rtx *operands)
18824 HOST_WIDE_INT n = INTVAL (operands[2]);
18826 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18828 if (n < 0)
18829 output_multi_immediate (operands,
18830 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18831 -n);
18832 else
18833 output_multi_immediate (operands,
18834 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18838 return "";
18841 /* Output a multiple immediate operation.
18842 OPERANDS is the vector of operands referred to in the output patterns.
18843 INSTR1 is the output pattern to use for the first constant.
18844 INSTR2 is the output pattern to use for subsequent constants.
18845 IMMED_OP is the index of the constant slot in OPERANDS.
18846 N is the constant value. */
18847 static const char *
18848 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18849 int immed_op, HOST_WIDE_INT n)
18851 #if HOST_BITS_PER_WIDE_INT > 32
18852 n &= 0xffffffff;
18853 #endif
18855 if (n == 0)
18857 /* Quick and easy output. */
18858 operands[immed_op] = const0_rtx;
18859 output_asm_insn (instr1, operands);
18861 else
18863 int i;
18864 const char * instr = instr1;
18866 /* Note that n is never zero here (which would give no output). */
18867 for (i = 0; i < 32; i += 2)
18869 if (n & (3 << i))
18871 operands[immed_op] = GEN_INT (n & (255 << i));
18872 output_asm_insn (instr, operands);
18873 instr = instr2;
18874 i += 6;
18879 return "";
18882 /* Return the name of a shifter operation. */
18883 static const char *
18884 arm_shift_nmem(enum rtx_code code)
18886 switch (code)
18888 case ASHIFT:
18889 return ARM_LSL_NAME;
18891 case ASHIFTRT:
18892 return "asr";
18894 case LSHIFTRT:
18895 return "lsr";
18897 case ROTATERT:
18898 return "ror";
18900 default:
18901 abort();
18905 /* Return the appropriate ARM instruction for the operation code.
18906 The returned result should not be overwritten. OP is the rtx of the
18907 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18908 was shifted. */
18909 const char *
18910 arithmetic_instr (rtx op, int shift_first_arg)
18912 switch (GET_CODE (op))
18914 case PLUS:
18915 return "add";
18917 case MINUS:
18918 return shift_first_arg ? "rsb" : "sub";
18920 case IOR:
18921 return "orr";
18923 case XOR:
18924 return "eor";
18926 case AND:
18927 return "and";
18929 case ASHIFT:
18930 case ASHIFTRT:
18931 case LSHIFTRT:
18932 case ROTATERT:
18933 return arm_shift_nmem(GET_CODE(op));
18935 default:
18936 gcc_unreachable ();
18940 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18941 for the operation code. The returned result should not be overwritten.
18942 OP is the rtx code of the shift.
18943 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18944 shift. */
18945 static const char *
18946 shift_op (rtx op, HOST_WIDE_INT *amountp)
18948 const char * mnem;
18949 enum rtx_code code = GET_CODE (op);
18951 switch (code)
18953 case ROTATE:
18954 if (!CONST_INT_P (XEXP (op, 1)))
18956 output_operand_lossage ("invalid shift operand");
18957 return NULL;
18960 code = ROTATERT;
18961 *amountp = 32 - INTVAL (XEXP (op, 1));
18962 mnem = "ror";
18963 break;
18965 case ASHIFT:
18966 case ASHIFTRT:
18967 case LSHIFTRT:
18968 case ROTATERT:
18969 mnem = arm_shift_nmem(code);
18970 if (CONST_INT_P (XEXP (op, 1)))
18972 *amountp = INTVAL (XEXP (op, 1));
18974 else if (REG_P (XEXP (op, 1)))
18976 *amountp = -1;
18977 return mnem;
18979 else
18981 output_operand_lossage ("invalid shift operand");
18982 return NULL;
18984 break;
18986 case MULT:
18987 /* We never have to worry about the amount being other than a
18988 power of 2, since this case can never be reloaded from a reg. */
18989 if (!CONST_INT_P (XEXP (op, 1)))
18991 output_operand_lossage ("invalid shift operand");
18992 return NULL;
18995 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18997 /* Amount must be a power of two. */
18998 if (*amountp & (*amountp - 1))
19000 output_operand_lossage ("invalid shift operand");
19001 return NULL;
19004 *amountp = int_log2 (*amountp);
19005 return ARM_LSL_NAME;
19007 default:
19008 output_operand_lossage ("invalid shift operand");
19009 return NULL;
19012 /* This is not 100% correct, but follows from the desire to merge
19013 multiplication by a power of 2 with the recognizer for a
19014 shift. >=32 is not a valid shift for "lsl", so we must try and
19015 output a shift that produces the correct arithmetical result.
19016 Using lsr #32 is identical except for the fact that the carry bit
19017 is not set correctly if we set the flags; but we never use the
19018 carry bit from such an operation, so we can ignore that. */
19019 if (code == ROTATERT)
19020 /* Rotate is just modulo 32. */
19021 *amountp &= 31;
19022 else if (*amountp != (*amountp & 31))
19024 if (code == ASHIFT)
19025 mnem = "lsr";
19026 *amountp = 32;
19029 /* Shifts of 0 are no-ops. */
19030 if (*amountp == 0)
19031 return NULL;
19033 return mnem;
19036 /* Obtain the shift from the POWER of two. */
19038 static HOST_WIDE_INT
19039 int_log2 (HOST_WIDE_INT power)
19041 HOST_WIDE_INT shift = 0;
19043 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
19045 gcc_assert (shift <= 31);
19046 shift++;
19049 return shift;
19052 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19053 because /bin/as is horribly restrictive. The judgement about
19054 whether or not each character is 'printable' (and can be output as
19055 is) or not (and must be printed with an octal escape) must be made
19056 with reference to the *host* character set -- the situation is
19057 similar to that discussed in the comments above pp_c_char in
19058 c-pretty-print.c. */
19060 #define MAX_ASCII_LEN 51
19062 void
19063 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19065 int i;
19066 int len_so_far = 0;
19068 fputs ("\t.ascii\t\"", stream);
19070 for (i = 0; i < len; i++)
19072 int c = p[i];
19074 if (len_so_far >= MAX_ASCII_LEN)
19076 fputs ("\"\n\t.ascii\t\"", stream);
19077 len_so_far = 0;
19080 if (ISPRINT (c))
19082 if (c == '\\' || c == '\"')
19084 putc ('\\', stream);
19085 len_so_far++;
19087 putc (c, stream);
19088 len_so_far++;
19090 else
19092 fprintf (stream, "\\%03o", c);
19093 len_so_far += 4;
19097 fputs ("\"\n", stream);
19100 /* Whether a register is callee saved or not. This is necessary because high
19101 registers are marked as caller saved when optimizing for size on Thumb-1
19102 targets despite being callee saved in order to avoid using them. */
19103 #define callee_saved_reg_p(reg) \
19104 (!call_used_regs[reg] \
19105 || (TARGET_THUMB1 && optimize_size \
19106 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19108 /* Compute the register save mask for registers 0 through 12
19109 inclusive. This code is used by arm_compute_save_reg_mask. */
19111 static unsigned long
19112 arm_compute_save_reg0_reg12_mask (void)
19114 unsigned long func_type = arm_current_func_type ();
19115 unsigned long save_reg_mask = 0;
19116 unsigned int reg;
19118 if (IS_INTERRUPT (func_type))
19120 unsigned int max_reg;
19121 /* Interrupt functions must not corrupt any registers,
19122 even call clobbered ones. If this is a leaf function
19123 we can just examine the registers used by the RTL, but
19124 otherwise we have to assume that whatever function is
19125 called might clobber anything, and so we have to save
19126 all the call-clobbered registers as well. */
19127 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19128 /* FIQ handlers have registers r8 - r12 banked, so
19129 we only need to check r0 - r7, Normal ISRs only
19130 bank r14 and r15, so we must check up to r12.
19131 r13 is the stack pointer which is always preserved,
19132 so we do not need to consider it here. */
19133 max_reg = 7;
19134 else
19135 max_reg = 12;
19137 for (reg = 0; reg <= max_reg; reg++)
19138 if (df_regs_ever_live_p (reg)
19139 || (! crtl->is_leaf && call_used_regs[reg]))
19140 save_reg_mask |= (1 << reg);
19142 /* Also save the pic base register if necessary. */
19143 if (flag_pic
19144 && !TARGET_SINGLE_PIC_BASE
19145 && arm_pic_register != INVALID_REGNUM
19146 && crtl->uses_pic_offset_table)
19147 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19149 else if (IS_VOLATILE(func_type))
19151 /* For noreturn functions we historically omitted register saves
19152 altogether. However this really messes up debugging. As a
19153 compromise save just the frame pointers. Combined with the link
19154 register saved elsewhere this should be sufficient to get
19155 a backtrace. */
19156 if (frame_pointer_needed)
19157 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19158 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19159 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19160 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19161 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19163 else
19165 /* In the normal case we only need to save those registers
19166 which are call saved and which are used by this function. */
19167 for (reg = 0; reg <= 11; reg++)
19168 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19169 save_reg_mask |= (1 << reg);
19171 /* Handle the frame pointer as a special case. */
19172 if (frame_pointer_needed)
19173 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19175 /* If we aren't loading the PIC register,
19176 don't stack it even though it may be live. */
19177 if (flag_pic
19178 && !TARGET_SINGLE_PIC_BASE
19179 && arm_pic_register != INVALID_REGNUM
19180 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19181 || crtl->uses_pic_offset_table))
19182 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19184 /* The prologue will copy SP into R0, so save it. */
19185 if (IS_STACKALIGN (func_type))
19186 save_reg_mask |= 1;
19189 /* Save registers so the exception handler can modify them. */
19190 if (crtl->calls_eh_return)
19192 unsigned int i;
19194 for (i = 0; ; i++)
19196 reg = EH_RETURN_DATA_REGNO (i);
19197 if (reg == INVALID_REGNUM)
19198 break;
19199 save_reg_mask |= 1 << reg;
19203 return save_reg_mask;
19206 /* Return true if r3 is live at the start of the function. */
19208 static bool
19209 arm_r3_live_at_start_p (void)
19211 /* Just look at cfg info, which is still close enough to correct at this
19212 point. This gives false positives for broken functions that might use
19213 uninitialized data that happens to be allocated in r3, but who cares? */
19214 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19217 /* Compute the number of bytes used to store the static chain register on the
19218 stack, above the stack frame. We need to know this accurately to get the
19219 alignment of the rest of the stack frame correct. */
19221 static int
19222 arm_compute_static_chain_stack_bytes (void)
19224 /* See the defining assertion in arm_expand_prologue. */
19225 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
19226 && IS_NESTED (arm_current_func_type ())
19227 && arm_r3_live_at_start_p ()
19228 && crtl->args.pretend_args_size == 0)
19229 return 4;
19231 return 0;
19234 /* Compute a bit mask of which registers need to be
19235 saved on the stack for the current function.
19236 This is used by arm_get_frame_offsets, which may add extra registers. */
19238 static unsigned long
19239 arm_compute_save_reg_mask (void)
19241 unsigned int save_reg_mask = 0;
19242 unsigned long func_type = arm_current_func_type ();
19243 unsigned int reg;
19245 if (IS_NAKED (func_type))
19246 /* This should never really happen. */
19247 return 0;
19249 /* If we are creating a stack frame, then we must save the frame pointer,
19250 IP (which will hold the old stack pointer), LR and the PC. */
19251 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19252 save_reg_mask |=
19253 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19254 | (1 << IP_REGNUM)
19255 | (1 << LR_REGNUM)
19256 | (1 << PC_REGNUM);
19258 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19260 /* Decide if we need to save the link register.
19261 Interrupt routines have their own banked link register,
19262 so they never need to save it.
19263 Otherwise if we do not use the link register we do not need to save
19264 it. If we are pushing other registers onto the stack however, we
19265 can save an instruction in the epilogue by pushing the link register
19266 now and then popping it back into the PC. This incurs extra memory
19267 accesses though, so we only do it when optimizing for size, and only
19268 if we know that we will not need a fancy return sequence. */
19269 if (df_regs_ever_live_p (LR_REGNUM)
19270 || (save_reg_mask
19271 && optimize_size
19272 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19273 && !crtl->tail_call_emit
19274 && !crtl->calls_eh_return))
19275 save_reg_mask |= 1 << LR_REGNUM;
19277 if (cfun->machine->lr_save_eliminated)
19278 save_reg_mask &= ~ (1 << LR_REGNUM);
19280 if (TARGET_REALLY_IWMMXT
19281 && ((bit_count (save_reg_mask)
19282 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19283 arm_compute_static_chain_stack_bytes())
19284 ) % 2) != 0)
19286 /* The total number of registers that are going to be pushed
19287 onto the stack is odd. We need to ensure that the stack
19288 is 64-bit aligned before we start to save iWMMXt registers,
19289 and also before we start to create locals. (A local variable
19290 might be a double or long long which we will load/store using
19291 an iWMMXt instruction). Therefore we need to push another
19292 ARM register, so that the stack will be 64-bit aligned. We
19293 try to avoid using the arg registers (r0 -r3) as they might be
19294 used to pass values in a tail call. */
19295 for (reg = 4; reg <= 12; reg++)
19296 if ((save_reg_mask & (1 << reg)) == 0)
19297 break;
19299 if (reg <= 12)
19300 save_reg_mask |= (1 << reg);
19301 else
19303 cfun->machine->sibcall_blocked = 1;
19304 save_reg_mask |= (1 << 3);
19308 /* We may need to push an additional register for use initializing the
19309 PIC base register. */
19310 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19311 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19313 reg = thumb_find_work_register (1 << 4);
19314 if (!call_used_regs[reg])
19315 save_reg_mask |= (1 << reg);
19318 return save_reg_mask;
19322 /* Compute a bit mask of which registers need to be
19323 saved on the stack for the current function. */
19324 static unsigned long
19325 thumb1_compute_save_reg_mask (void)
19327 unsigned long mask;
19328 unsigned reg;
19330 mask = 0;
19331 for (reg = 0; reg < 12; reg ++)
19332 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19333 mask |= 1 << reg;
19335 if (flag_pic
19336 && !TARGET_SINGLE_PIC_BASE
19337 && arm_pic_register != INVALID_REGNUM
19338 && crtl->uses_pic_offset_table)
19339 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19341 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19342 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19343 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19345 /* LR will also be pushed if any lo regs are pushed. */
19346 if (mask & 0xff || thumb_force_lr_save ())
19347 mask |= (1 << LR_REGNUM);
19349 /* Make sure we have a low work register if we need one.
19350 We will need one if we are going to push a high register,
19351 but we are not currently intending to push a low register. */
19352 if ((mask & 0xff) == 0
19353 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19355 /* Use thumb_find_work_register to choose which register
19356 we will use. If the register is live then we will
19357 have to push it. Use LAST_LO_REGNUM as our fallback
19358 choice for the register to select. */
19359 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19360 /* Make sure the register returned by thumb_find_work_register is
19361 not part of the return value. */
19362 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19363 reg = LAST_LO_REGNUM;
19365 if (callee_saved_reg_p (reg))
19366 mask |= 1 << reg;
19369 /* The 504 below is 8 bytes less than 512 because there are two possible
19370 alignment words. We can't tell here if they will be present or not so we
19371 have to play it safe and assume that they are. */
19372 if ((CALLER_INTERWORKING_SLOT_SIZE +
19373 ROUND_UP_WORD (get_frame_size ()) +
19374 crtl->outgoing_args_size) >= 504)
19376 /* This is the same as the code in thumb1_expand_prologue() which
19377 determines which register to use for stack decrement. */
19378 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19379 if (mask & (1 << reg))
19380 break;
19382 if (reg > LAST_LO_REGNUM)
19384 /* Make sure we have a register available for stack decrement. */
19385 mask |= 1 << LAST_LO_REGNUM;
19389 return mask;
19393 /* Return the number of bytes required to save VFP registers. */
19394 static int
19395 arm_get_vfp_saved_size (void)
19397 unsigned int regno;
19398 int count;
19399 int saved;
19401 saved = 0;
19402 /* Space for saved VFP registers. */
19403 if (TARGET_HARD_FLOAT && TARGET_VFP)
19405 count = 0;
19406 for (regno = FIRST_VFP_REGNUM;
19407 regno < LAST_VFP_REGNUM;
19408 regno += 2)
19410 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19411 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19413 if (count > 0)
19415 /* Workaround ARM10 VFPr1 bug. */
19416 if (count == 2 && !arm_arch6)
19417 count++;
19418 saved += count * 8;
19420 count = 0;
19422 else
19423 count++;
19425 if (count > 0)
19427 if (count == 2 && !arm_arch6)
19428 count++;
19429 saved += count * 8;
19432 return saved;
19436 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19437 everything bar the final return instruction. If simple_return is true,
19438 then do not output epilogue, because it has already been emitted in RTL. */
19439 const char *
19440 output_return_instruction (rtx operand, bool really_return, bool reverse,
19441 bool simple_return)
19443 char conditional[10];
19444 char instr[100];
19445 unsigned reg;
19446 unsigned long live_regs_mask;
19447 unsigned long func_type;
19448 arm_stack_offsets *offsets;
19450 func_type = arm_current_func_type ();
19452 if (IS_NAKED (func_type))
19453 return "";
19455 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19457 /* If this function was declared non-returning, and we have
19458 found a tail call, then we have to trust that the called
19459 function won't return. */
19460 if (really_return)
19462 rtx ops[2];
19464 /* Otherwise, trap an attempted return by aborting. */
19465 ops[0] = operand;
19466 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19467 : "abort");
19468 assemble_external_libcall (ops[1]);
19469 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19472 return "";
19475 gcc_assert (!cfun->calls_alloca || really_return);
19477 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19479 cfun->machine->return_used_this_function = 1;
19481 offsets = arm_get_frame_offsets ();
19482 live_regs_mask = offsets->saved_regs_mask;
19484 if (!simple_return && live_regs_mask)
19486 const char * return_reg;
19488 /* If we do not have any special requirements for function exit
19489 (e.g. interworking) then we can load the return address
19490 directly into the PC. Otherwise we must load it into LR. */
19491 if (really_return
19492 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19493 return_reg = reg_names[PC_REGNUM];
19494 else
19495 return_reg = reg_names[LR_REGNUM];
19497 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19499 /* There are three possible reasons for the IP register
19500 being saved. 1) a stack frame was created, in which case
19501 IP contains the old stack pointer, or 2) an ISR routine
19502 corrupted it, or 3) it was saved to align the stack on
19503 iWMMXt. In case 1, restore IP into SP, otherwise just
19504 restore IP. */
19505 if (frame_pointer_needed)
19507 live_regs_mask &= ~ (1 << IP_REGNUM);
19508 live_regs_mask |= (1 << SP_REGNUM);
19510 else
19511 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19514 /* On some ARM architectures it is faster to use LDR rather than
19515 LDM to load a single register. On other architectures, the
19516 cost is the same. In 26 bit mode, or for exception handlers,
19517 we have to use LDM to load the PC so that the CPSR is also
19518 restored. */
19519 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19520 if (live_regs_mask == (1U << reg))
19521 break;
19523 if (reg <= LAST_ARM_REGNUM
19524 && (reg != LR_REGNUM
19525 || ! really_return
19526 || ! IS_INTERRUPT (func_type)))
19528 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19529 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19531 else
19533 char *p;
19534 int first = 1;
19536 /* Generate the load multiple instruction to restore the
19537 registers. Note we can get here, even if
19538 frame_pointer_needed is true, but only if sp already
19539 points to the base of the saved core registers. */
19540 if (live_regs_mask & (1 << SP_REGNUM))
19542 unsigned HOST_WIDE_INT stack_adjust;
19544 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19545 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19547 if (stack_adjust && arm_arch5 && TARGET_ARM)
19548 if (TARGET_UNIFIED_ASM)
19549 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19550 else
19551 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19552 else
19554 /* If we can't use ldmib (SA110 bug),
19555 then try to pop r3 instead. */
19556 if (stack_adjust)
19557 live_regs_mask |= 1 << 3;
19559 if (TARGET_UNIFIED_ASM)
19560 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19561 else
19562 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19565 else
19566 if (TARGET_UNIFIED_ASM)
19567 sprintf (instr, "pop%s\t{", conditional);
19568 else
19569 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19571 p = instr + strlen (instr);
19573 for (reg = 0; reg <= SP_REGNUM; reg++)
19574 if (live_regs_mask & (1 << reg))
19576 int l = strlen (reg_names[reg]);
19578 if (first)
19579 first = 0;
19580 else
19582 memcpy (p, ", ", 2);
19583 p += 2;
19586 memcpy (p, "%|", 2);
19587 memcpy (p + 2, reg_names[reg], l);
19588 p += l + 2;
19591 if (live_regs_mask & (1 << LR_REGNUM))
19593 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19594 /* If returning from an interrupt, restore the CPSR. */
19595 if (IS_INTERRUPT (func_type))
19596 strcat (p, "^");
19598 else
19599 strcpy (p, "}");
19602 output_asm_insn (instr, & operand);
19604 /* See if we need to generate an extra instruction to
19605 perform the actual function return. */
19606 if (really_return
19607 && func_type != ARM_FT_INTERWORKED
19608 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19610 /* The return has already been handled
19611 by loading the LR into the PC. */
19612 return "";
19616 if (really_return)
19618 switch ((int) ARM_FUNC_TYPE (func_type))
19620 case ARM_FT_ISR:
19621 case ARM_FT_FIQ:
19622 /* ??? This is wrong for unified assembly syntax. */
19623 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19624 break;
19626 case ARM_FT_INTERWORKED:
19627 sprintf (instr, "bx%s\t%%|lr", conditional);
19628 break;
19630 case ARM_FT_EXCEPTION:
19631 /* ??? This is wrong for unified assembly syntax. */
19632 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19633 break;
19635 default:
19636 /* Use bx if it's available. */
19637 if (arm_arch5 || arm_arch4t)
19638 sprintf (instr, "bx%s\t%%|lr", conditional);
19639 else
19640 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19641 break;
19644 output_asm_insn (instr, & operand);
19647 return "";
19650 /* Write the function name into the code section, directly preceding
19651 the function prologue.
19653 Code will be output similar to this:
19655 .ascii "arm_poke_function_name", 0
19656 .align
19658 .word 0xff000000 + (t1 - t0)
19659 arm_poke_function_name
19660 mov ip, sp
19661 stmfd sp!, {fp, ip, lr, pc}
19662 sub fp, ip, #4
19664 When performing a stack backtrace, code can inspect the value
19665 of 'pc' stored at 'fp' + 0. If the trace function then looks
19666 at location pc - 12 and the top 8 bits are set, then we know
19667 that there is a function name embedded immediately preceding this
19668 location and has length ((pc[-3]) & 0xff000000).
19670 We assume that pc is declared as a pointer to an unsigned long.
19672 It is of no benefit to output the function name if we are assembling
19673 a leaf function. These function types will not contain a stack
19674 backtrace structure, therefore it is not possible to determine the
19675 function name. */
19676 void
19677 arm_poke_function_name (FILE *stream, const char *name)
19679 unsigned long alignlength;
19680 unsigned long length;
19681 rtx x;
19683 length = strlen (name) + 1;
19684 alignlength = ROUND_UP_WORD (length);
19686 ASM_OUTPUT_ASCII (stream, name, length);
19687 ASM_OUTPUT_ALIGN (stream, 2);
19688 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19689 assemble_aligned_integer (UNITS_PER_WORD, x);
19692 /* Place some comments into the assembler stream
19693 describing the current function. */
19694 static void
19695 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19697 unsigned long func_type;
19699 /* ??? Do we want to print some of the below anyway? */
19700 if (TARGET_THUMB1)
19701 return;
19703 /* Sanity check. */
19704 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19706 func_type = arm_current_func_type ();
19708 switch ((int) ARM_FUNC_TYPE (func_type))
19710 default:
19711 case ARM_FT_NORMAL:
19712 break;
19713 case ARM_FT_INTERWORKED:
19714 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19715 break;
19716 case ARM_FT_ISR:
19717 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19718 break;
19719 case ARM_FT_FIQ:
19720 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19721 break;
19722 case ARM_FT_EXCEPTION:
19723 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19724 break;
19727 if (IS_NAKED (func_type))
19728 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19730 if (IS_VOLATILE (func_type))
19731 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19733 if (IS_NESTED (func_type))
19734 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19735 if (IS_STACKALIGN (func_type))
19736 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19738 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19739 crtl->args.size,
19740 crtl->args.pretend_args_size, frame_size);
19742 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19743 frame_pointer_needed,
19744 cfun->machine->uses_anonymous_args);
19746 if (cfun->machine->lr_save_eliminated)
19747 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19749 if (crtl->calls_eh_return)
19750 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19754 static void
19755 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19756 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19758 arm_stack_offsets *offsets;
19760 if (TARGET_THUMB1)
19762 int regno;
19764 /* Emit any call-via-reg trampolines that are needed for v4t support
19765 of call_reg and call_value_reg type insns. */
19766 for (regno = 0; regno < LR_REGNUM; regno++)
19768 rtx label = cfun->machine->call_via[regno];
19770 if (label != NULL)
19772 switch_to_section (function_section (current_function_decl));
19773 targetm.asm_out.internal_label (asm_out_file, "L",
19774 CODE_LABEL_NUMBER (label));
19775 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19779 /* ??? Probably not safe to set this here, since it assumes that a
19780 function will be emitted as assembly immediately after we generate
19781 RTL for it. This does not happen for inline functions. */
19782 cfun->machine->return_used_this_function = 0;
19784 else /* TARGET_32BIT */
19786 /* We need to take into account any stack-frame rounding. */
19787 offsets = arm_get_frame_offsets ();
19789 gcc_assert (!use_return_insn (FALSE, NULL)
19790 || (cfun->machine->return_used_this_function != 0)
19791 || offsets->saved_regs == offsets->outgoing_args
19792 || frame_pointer_needed);
19796 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19797 STR and STRD. If an even number of registers are being pushed, one
19798 or more STRD patterns are created for each register pair. If an
19799 odd number of registers are pushed, emit an initial STR followed by
19800 as many STRD instructions as are needed. This works best when the
19801 stack is initially 64-bit aligned (the normal case), since it
19802 ensures that each STRD is also 64-bit aligned. */
19803 static void
19804 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19806 int num_regs = 0;
19807 int i;
19808 int regno;
19809 rtx par = NULL_RTX;
19810 rtx dwarf = NULL_RTX;
19811 rtx tmp;
19812 bool first = true;
19814 num_regs = bit_count (saved_regs_mask);
19816 /* Must be at least one register to save, and can't save SP or PC. */
19817 gcc_assert (num_regs > 0 && num_regs <= 14);
19818 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19819 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19821 /* Create sequence for DWARF info. All the frame-related data for
19822 debugging is held in this wrapper. */
19823 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19825 /* Describe the stack adjustment. */
19826 tmp = gen_rtx_SET (VOIDmode,
19827 stack_pointer_rtx,
19828 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19829 RTX_FRAME_RELATED_P (tmp) = 1;
19830 XVECEXP (dwarf, 0, 0) = tmp;
19832 /* Find the first register. */
19833 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19836 i = 0;
19838 /* If there's an odd number of registers to push. Start off by
19839 pushing a single register. This ensures that subsequent strd
19840 operations are dword aligned (assuming that SP was originally
19841 64-bit aligned). */
19842 if ((num_regs & 1) != 0)
19844 rtx reg, mem, insn;
19846 reg = gen_rtx_REG (SImode, regno);
19847 if (num_regs == 1)
19848 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19849 stack_pointer_rtx));
19850 else
19851 mem = gen_frame_mem (Pmode,
19852 gen_rtx_PRE_MODIFY
19853 (Pmode, stack_pointer_rtx,
19854 plus_constant (Pmode, stack_pointer_rtx,
19855 -4 * num_regs)));
19857 tmp = gen_rtx_SET (VOIDmode, mem, reg);
19858 RTX_FRAME_RELATED_P (tmp) = 1;
19859 insn = emit_insn (tmp);
19860 RTX_FRAME_RELATED_P (insn) = 1;
19861 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19862 tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19863 reg);
19864 RTX_FRAME_RELATED_P (tmp) = 1;
19865 i++;
19866 regno++;
19867 XVECEXP (dwarf, 0, i) = tmp;
19868 first = false;
19871 while (i < num_regs)
19872 if (saved_regs_mask & (1 << regno))
19874 rtx reg1, reg2, mem1, mem2;
19875 rtx tmp0, tmp1, tmp2;
19876 int regno2;
19878 /* Find the register to pair with this one. */
19879 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19880 regno2++)
19883 reg1 = gen_rtx_REG (SImode, regno);
19884 reg2 = gen_rtx_REG (SImode, regno2);
19886 if (first)
19888 rtx insn;
19890 first = false;
19891 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19892 stack_pointer_rtx,
19893 -4 * num_regs));
19894 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19895 stack_pointer_rtx,
19896 -4 * (num_regs - 1)));
19897 tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19898 plus_constant (Pmode, stack_pointer_rtx,
19899 -4 * (num_regs)));
19900 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19901 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19902 RTX_FRAME_RELATED_P (tmp0) = 1;
19903 RTX_FRAME_RELATED_P (tmp1) = 1;
19904 RTX_FRAME_RELATED_P (tmp2) = 1;
19905 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19906 XVECEXP (par, 0, 0) = tmp0;
19907 XVECEXP (par, 0, 1) = tmp1;
19908 XVECEXP (par, 0, 2) = tmp2;
19909 insn = emit_insn (par);
19910 RTX_FRAME_RELATED_P (insn) = 1;
19911 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19913 else
19915 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19916 stack_pointer_rtx,
19917 4 * i));
19918 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19919 stack_pointer_rtx,
19920 4 * (i + 1)));
19921 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19922 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19923 RTX_FRAME_RELATED_P (tmp1) = 1;
19924 RTX_FRAME_RELATED_P (tmp2) = 1;
19925 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19926 XVECEXP (par, 0, 0) = tmp1;
19927 XVECEXP (par, 0, 1) = tmp2;
19928 emit_insn (par);
19931 /* Create unwind information. This is an approximation. */
19932 tmp1 = gen_rtx_SET (VOIDmode,
19933 gen_frame_mem (Pmode,
19934 plus_constant (Pmode,
19935 stack_pointer_rtx,
19936 4 * i)),
19937 reg1);
19938 tmp2 = gen_rtx_SET (VOIDmode,
19939 gen_frame_mem (Pmode,
19940 plus_constant (Pmode,
19941 stack_pointer_rtx,
19942 4 * (i + 1))),
19943 reg2);
19945 RTX_FRAME_RELATED_P (tmp1) = 1;
19946 RTX_FRAME_RELATED_P (tmp2) = 1;
19947 XVECEXP (dwarf, 0, i + 1) = tmp1;
19948 XVECEXP (dwarf, 0, i + 2) = tmp2;
19949 i += 2;
19950 regno = regno2 + 1;
19952 else
19953 regno++;
19955 return;
19958 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19959 whenever possible, otherwise it emits single-word stores. The first store
19960 also allocates stack space for all saved registers, using writeback with
19961 post-addressing mode. All other stores use offset addressing. If no STRD
19962 can be emitted, this function emits a sequence of single-word stores,
19963 and not an STM as before, because single-word stores provide more freedom
19964 scheduling and can be turned into an STM by peephole optimizations. */
19965 static void
19966 arm_emit_strd_push (unsigned long saved_regs_mask)
19968 int num_regs = 0;
19969 int i, j, dwarf_index = 0;
19970 int offset = 0;
19971 rtx dwarf = NULL_RTX;
19972 rtx insn = NULL_RTX;
19973 rtx tmp, mem;
19975 /* TODO: A more efficient code can be emitted by changing the
19976 layout, e.g., first push all pairs that can use STRD to keep the
19977 stack aligned, and then push all other registers. */
19978 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19979 if (saved_regs_mask & (1 << i))
19980 num_regs++;
19982 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19983 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19984 gcc_assert (num_regs > 0);
19986 /* Create sequence for DWARF info. */
19987 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19989 /* For dwarf info, we generate explicit stack update. */
19990 tmp = gen_rtx_SET (VOIDmode,
19991 stack_pointer_rtx,
19992 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19993 RTX_FRAME_RELATED_P (tmp) = 1;
19994 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19996 /* Save registers. */
19997 offset = - 4 * num_regs;
19998 j = 0;
19999 while (j <= LAST_ARM_REGNUM)
20000 if (saved_regs_mask & (1 << j))
20002 if ((j % 2 == 0)
20003 && (saved_regs_mask & (1 << (j + 1))))
20005 /* Current register and previous register form register pair for
20006 which STRD can be generated. */
20007 if (offset < 0)
20009 /* Allocate stack space for all saved registers. */
20010 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20011 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20012 mem = gen_frame_mem (DImode, tmp);
20013 offset = 0;
20015 else if (offset > 0)
20016 mem = gen_frame_mem (DImode,
20017 plus_constant (Pmode,
20018 stack_pointer_rtx,
20019 offset));
20020 else
20021 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20023 tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
20024 RTX_FRAME_RELATED_P (tmp) = 1;
20025 tmp = emit_insn (tmp);
20027 /* Record the first store insn. */
20028 if (dwarf_index == 1)
20029 insn = tmp;
20031 /* Generate dwarf info. */
20032 mem = gen_frame_mem (SImode,
20033 plus_constant (Pmode,
20034 stack_pointer_rtx,
20035 offset));
20036 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
20037 RTX_FRAME_RELATED_P (tmp) = 1;
20038 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20040 mem = gen_frame_mem (SImode,
20041 plus_constant (Pmode,
20042 stack_pointer_rtx,
20043 offset + 4));
20044 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
20045 RTX_FRAME_RELATED_P (tmp) = 1;
20046 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20048 offset += 8;
20049 j += 2;
20051 else
20053 /* Emit a single word store. */
20054 if (offset < 0)
20056 /* Allocate stack space for all saved registers. */
20057 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20058 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20059 mem = gen_frame_mem (SImode, tmp);
20060 offset = 0;
20062 else if (offset > 0)
20063 mem = gen_frame_mem (SImode,
20064 plus_constant (Pmode,
20065 stack_pointer_rtx,
20066 offset));
20067 else
20068 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20070 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
20071 RTX_FRAME_RELATED_P (tmp) = 1;
20072 tmp = emit_insn (tmp);
20074 /* Record the first store insn. */
20075 if (dwarf_index == 1)
20076 insn = tmp;
20078 /* Generate dwarf info. */
20079 mem = gen_frame_mem (SImode,
20080 plus_constant(Pmode,
20081 stack_pointer_rtx,
20082 offset));
20083 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
20084 RTX_FRAME_RELATED_P (tmp) = 1;
20085 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20087 offset += 4;
20088 j += 1;
20091 else
20092 j++;
20094 /* Attach dwarf info to the first insn we generate. */
20095 gcc_assert (insn != NULL_RTX);
20096 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20097 RTX_FRAME_RELATED_P (insn) = 1;
20100 /* Generate and emit an insn that we will recognize as a push_multi.
20101 Unfortunately, since this insn does not reflect very well the actual
20102 semantics of the operation, we need to annotate the insn for the benefit
20103 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20104 MASK for registers that should be annotated for DWARF2 frame unwind
20105 information. */
20106 static rtx
20107 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20109 int num_regs = 0;
20110 int num_dwarf_regs = 0;
20111 int i, j;
20112 rtx par;
20113 rtx dwarf;
20114 int dwarf_par_index;
20115 rtx tmp, reg;
20117 /* We don't record the PC in the dwarf frame information. */
20118 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20120 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20122 if (mask & (1 << i))
20123 num_regs++;
20124 if (dwarf_regs_mask & (1 << i))
20125 num_dwarf_regs++;
20128 gcc_assert (num_regs && num_regs <= 16);
20129 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20131 /* For the body of the insn we are going to generate an UNSPEC in
20132 parallel with several USEs. This allows the insn to be recognized
20133 by the push_multi pattern in the arm.md file.
20135 The body of the insn looks something like this:
20137 (parallel [
20138 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20139 (const_int:SI <num>)))
20140 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20141 (use (reg:SI XX))
20142 (use (reg:SI YY))
20146 For the frame note however, we try to be more explicit and actually
20147 show each register being stored into the stack frame, plus a (single)
20148 decrement of the stack pointer. We do it this way in order to be
20149 friendly to the stack unwinding code, which only wants to see a single
20150 stack decrement per instruction. The RTL we generate for the note looks
20151 something like this:
20153 (sequence [
20154 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20155 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20156 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20157 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20161 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20162 instead we'd have a parallel expression detailing all
20163 the stores to the various memory addresses so that debug
20164 information is more up-to-date. Remember however while writing
20165 this to take care of the constraints with the push instruction.
20167 Note also that this has to be taken care of for the VFP registers.
20169 For more see PR43399. */
20171 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20172 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20173 dwarf_par_index = 1;
20175 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20177 if (mask & (1 << i))
20179 reg = gen_rtx_REG (SImode, i);
20181 XVECEXP (par, 0, 0)
20182 = gen_rtx_SET (VOIDmode,
20183 gen_frame_mem
20184 (BLKmode,
20185 gen_rtx_PRE_MODIFY (Pmode,
20186 stack_pointer_rtx,
20187 plus_constant
20188 (Pmode, stack_pointer_rtx,
20189 -4 * num_regs))
20191 gen_rtx_UNSPEC (BLKmode,
20192 gen_rtvec (1, reg),
20193 UNSPEC_PUSH_MULT));
20195 if (dwarf_regs_mask & (1 << i))
20197 tmp = gen_rtx_SET (VOIDmode,
20198 gen_frame_mem (SImode, stack_pointer_rtx),
20199 reg);
20200 RTX_FRAME_RELATED_P (tmp) = 1;
20201 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20204 break;
20208 for (j = 1, i++; j < num_regs; i++)
20210 if (mask & (1 << i))
20212 reg = gen_rtx_REG (SImode, i);
20214 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20216 if (dwarf_regs_mask & (1 << i))
20219 = gen_rtx_SET (VOIDmode,
20220 gen_frame_mem
20221 (SImode,
20222 plus_constant (Pmode, stack_pointer_rtx,
20223 4 * j)),
20224 reg);
20225 RTX_FRAME_RELATED_P (tmp) = 1;
20226 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20229 j++;
20233 par = emit_insn (par);
20235 tmp = gen_rtx_SET (VOIDmode,
20236 stack_pointer_rtx,
20237 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20238 RTX_FRAME_RELATED_P (tmp) = 1;
20239 XVECEXP (dwarf, 0, 0) = tmp;
20241 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20243 return par;
20246 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20247 SIZE is the offset to be adjusted.
20248 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20249 static void
20250 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20252 rtx dwarf;
20254 RTX_FRAME_RELATED_P (insn) = 1;
20255 dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
20256 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20259 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20260 SAVED_REGS_MASK shows which registers need to be restored.
20262 Unfortunately, since this insn does not reflect very well the actual
20263 semantics of the operation, we need to annotate the insn for the benefit
20264 of DWARF2 frame unwind information. */
20265 static void
20266 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20268 int num_regs = 0;
20269 int i, j;
20270 rtx par;
20271 rtx dwarf = NULL_RTX;
20272 rtx tmp, reg;
20273 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20274 int offset_adj;
20275 int emit_update;
20277 offset_adj = return_in_pc ? 1 : 0;
20278 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20279 if (saved_regs_mask & (1 << i))
20280 num_regs++;
20282 gcc_assert (num_regs && num_regs <= 16);
20284 /* If SP is in reglist, then we don't emit SP update insn. */
20285 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20287 /* The parallel needs to hold num_regs SETs
20288 and one SET for the stack update. */
20289 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20291 if (return_in_pc)
20292 XVECEXP (par, 0, 0) = ret_rtx;
20294 if (emit_update)
20296 /* Increment the stack pointer, based on there being
20297 num_regs 4-byte registers to restore. */
20298 tmp = gen_rtx_SET (VOIDmode,
20299 stack_pointer_rtx,
20300 plus_constant (Pmode,
20301 stack_pointer_rtx,
20302 4 * num_regs));
20303 RTX_FRAME_RELATED_P (tmp) = 1;
20304 XVECEXP (par, 0, offset_adj) = tmp;
20307 /* Now restore every reg, which may include PC. */
20308 for (j = 0, i = 0; j < num_regs; i++)
20309 if (saved_regs_mask & (1 << i))
20311 reg = gen_rtx_REG (SImode, i);
20312 if ((num_regs == 1) && emit_update && !return_in_pc)
20314 /* Emit single load with writeback. */
20315 tmp = gen_frame_mem (SImode,
20316 gen_rtx_POST_INC (Pmode,
20317 stack_pointer_rtx));
20318 tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
20319 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20320 return;
20323 tmp = gen_rtx_SET (VOIDmode,
20324 reg,
20325 gen_frame_mem
20326 (SImode,
20327 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20328 RTX_FRAME_RELATED_P (tmp) = 1;
20329 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20331 /* We need to maintain a sequence for DWARF info too. As dwarf info
20332 should not have PC, skip PC. */
20333 if (i != PC_REGNUM)
20334 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20336 j++;
20339 if (return_in_pc)
20340 par = emit_jump_insn (par);
20341 else
20342 par = emit_insn (par);
20344 REG_NOTES (par) = dwarf;
20345 if (!return_in_pc)
20346 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20347 stack_pointer_rtx, stack_pointer_rtx);
20350 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20351 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20353 Unfortunately, since this insn does not reflect very well the actual
20354 semantics of the operation, we need to annotate the insn for the benefit
20355 of DWARF2 frame unwind information. */
20356 static void
20357 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20359 int i, j;
20360 rtx par;
20361 rtx dwarf = NULL_RTX;
20362 rtx tmp, reg;
20364 gcc_assert (num_regs && num_regs <= 32);
20366 /* Workaround ARM10 VFPr1 bug. */
20367 if (num_regs == 2 && !arm_arch6)
20369 if (first_reg == 15)
20370 first_reg--;
20372 num_regs++;
20375 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20376 there could be up to 32 D-registers to restore.
20377 If there are more than 16 D-registers, make two recursive calls,
20378 each of which emits one pop_multi instruction. */
20379 if (num_regs > 16)
20381 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20382 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20383 return;
20386 /* The parallel needs to hold num_regs SETs
20387 and one SET for the stack update. */
20388 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20390 /* Increment the stack pointer, based on there being
20391 num_regs 8-byte registers to restore. */
20392 tmp = gen_rtx_SET (VOIDmode,
20393 base_reg,
20394 plus_constant (Pmode, base_reg, 8 * num_regs));
20395 RTX_FRAME_RELATED_P (tmp) = 1;
20396 XVECEXP (par, 0, 0) = tmp;
20398 /* Now show every reg that will be restored, using a SET for each. */
20399 for (j = 0, i=first_reg; j < num_regs; i += 2)
20401 reg = gen_rtx_REG (DFmode, i);
20403 tmp = gen_rtx_SET (VOIDmode,
20404 reg,
20405 gen_frame_mem
20406 (DFmode,
20407 plus_constant (Pmode, base_reg, 8 * j)));
20408 RTX_FRAME_RELATED_P (tmp) = 1;
20409 XVECEXP (par, 0, j + 1) = tmp;
20411 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20413 j++;
20416 par = emit_insn (par);
20417 REG_NOTES (par) = dwarf;
20419 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20420 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20422 RTX_FRAME_RELATED_P (par) = 1;
20423 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20425 else
20426 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20427 base_reg, base_reg);
20430 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20431 number of registers are being popped, multiple LDRD patterns are created for
20432 all register pairs. If odd number of registers are popped, last register is
20433 loaded by using LDR pattern. */
20434 static void
20435 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20437 int num_regs = 0;
20438 int i, j;
20439 rtx par = NULL_RTX;
20440 rtx dwarf = NULL_RTX;
20441 rtx tmp, reg, tmp1;
20442 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20444 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20445 if (saved_regs_mask & (1 << i))
20446 num_regs++;
20448 gcc_assert (num_regs && num_regs <= 16);
20450 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20451 to be popped. So, if num_regs is even, now it will become odd,
20452 and we can generate pop with PC. If num_regs is odd, it will be
20453 even now, and ldr with return can be generated for PC. */
20454 if (return_in_pc)
20455 num_regs--;
20457 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20459 /* Var j iterates over all the registers to gather all the registers in
20460 saved_regs_mask. Var i gives index of saved registers in stack frame.
20461 A PARALLEL RTX of register-pair is created here, so that pattern for
20462 LDRD can be matched. As PC is always last register to be popped, and
20463 we have already decremented num_regs if PC, we don't have to worry
20464 about PC in this loop. */
20465 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20466 if (saved_regs_mask & (1 << j))
20468 /* Create RTX for memory load. */
20469 reg = gen_rtx_REG (SImode, j);
20470 tmp = gen_rtx_SET (SImode,
20471 reg,
20472 gen_frame_mem (SImode,
20473 plus_constant (Pmode,
20474 stack_pointer_rtx, 4 * i)));
20475 RTX_FRAME_RELATED_P (tmp) = 1;
20477 if (i % 2 == 0)
20479 /* When saved-register index (i) is even, the RTX to be emitted is
20480 yet to be created. Hence create it first. The LDRD pattern we
20481 are generating is :
20482 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20483 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20484 where target registers need not be consecutive. */
20485 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20486 dwarf = NULL_RTX;
20489 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20490 added as 0th element and if i is odd, reg_i is added as 1st element
20491 of LDRD pattern shown above. */
20492 XVECEXP (par, 0, (i % 2)) = tmp;
20493 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20495 if ((i % 2) == 1)
20497 /* When saved-register index (i) is odd, RTXs for both the registers
20498 to be loaded are generated in above given LDRD pattern, and the
20499 pattern can be emitted now. */
20500 par = emit_insn (par);
20501 REG_NOTES (par) = dwarf;
20502 RTX_FRAME_RELATED_P (par) = 1;
20505 i++;
20508 /* If the number of registers pushed is odd AND return_in_pc is false OR
20509 number of registers are even AND return_in_pc is true, last register is
20510 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20511 then LDR with post increment. */
20513 /* Increment the stack pointer, based on there being
20514 num_regs 4-byte registers to restore. */
20515 tmp = gen_rtx_SET (VOIDmode,
20516 stack_pointer_rtx,
20517 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20518 RTX_FRAME_RELATED_P (tmp) = 1;
20519 tmp = emit_insn (tmp);
20520 if (!return_in_pc)
20522 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20523 stack_pointer_rtx, stack_pointer_rtx);
20526 dwarf = NULL_RTX;
20528 if (((num_regs % 2) == 1 && !return_in_pc)
20529 || ((num_regs % 2) == 0 && return_in_pc))
20531 /* Scan for the single register to be popped. Skip until the saved
20532 register is found. */
20533 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20535 /* Gen LDR with post increment here. */
20536 tmp1 = gen_rtx_MEM (SImode,
20537 gen_rtx_POST_INC (SImode,
20538 stack_pointer_rtx));
20539 set_mem_alias_set (tmp1, get_frame_alias_set ());
20541 reg = gen_rtx_REG (SImode, j);
20542 tmp = gen_rtx_SET (SImode, reg, tmp1);
20543 RTX_FRAME_RELATED_P (tmp) = 1;
20544 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20546 if (return_in_pc)
20548 /* If return_in_pc, j must be PC_REGNUM. */
20549 gcc_assert (j == PC_REGNUM);
20550 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20551 XVECEXP (par, 0, 0) = ret_rtx;
20552 XVECEXP (par, 0, 1) = tmp;
20553 par = emit_jump_insn (par);
20555 else
20557 par = emit_insn (tmp);
20558 REG_NOTES (par) = dwarf;
20559 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20560 stack_pointer_rtx, stack_pointer_rtx);
20564 else if ((num_regs % 2) == 1 && return_in_pc)
20566 /* There are 2 registers to be popped. So, generate the pattern
20567 pop_multiple_with_stack_update_and_return to pop in PC. */
20568 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20571 return;
20574 /* LDRD in ARM mode needs consecutive registers as operands. This function
20575 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20576 offset addressing and then generates one separate stack udpate. This provides
20577 more scheduling freedom, compared to writeback on every load. However,
20578 if the function returns using load into PC directly
20579 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20580 before the last load. TODO: Add a peephole optimization to recognize
20581 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20582 peephole optimization to merge the load at stack-offset zero
20583 with the stack update instruction using load with writeback
20584 in post-index addressing mode. */
20585 static void
20586 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20588 int j = 0;
20589 int offset = 0;
20590 rtx par = NULL_RTX;
20591 rtx dwarf = NULL_RTX;
20592 rtx tmp, mem;
20594 /* Restore saved registers. */
20595 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20596 j = 0;
20597 while (j <= LAST_ARM_REGNUM)
20598 if (saved_regs_mask & (1 << j))
20600 if ((j % 2) == 0
20601 && (saved_regs_mask & (1 << (j + 1)))
20602 && (j + 1) != PC_REGNUM)
20604 /* Current register and next register form register pair for which
20605 LDRD can be generated. PC is always the last register popped, and
20606 we handle it separately. */
20607 if (offset > 0)
20608 mem = gen_frame_mem (DImode,
20609 plus_constant (Pmode,
20610 stack_pointer_rtx,
20611 offset));
20612 else
20613 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20615 tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20616 tmp = emit_insn (tmp);
20617 RTX_FRAME_RELATED_P (tmp) = 1;
20619 /* Generate dwarf info. */
20621 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20622 gen_rtx_REG (SImode, j),
20623 NULL_RTX);
20624 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20625 gen_rtx_REG (SImode, j + 1),
20626 dwarf);
20628 REG_NOTES (tmp) = dwarf;
20630 offset += 8;
20631 j += 2;
20633 else if (j != PC_REGNUM)
20635 /* Emit a single word load. */
20636 if (offset > 0)
20637 mem = gen_frame_mem (SImode,
20638 plus_constant (Pmode,
20639 stack_pointer_rtx,
20640 offset));
20641 else
20642 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20644 tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20645 tmp = emit_insn (tmp);
20646 RTX_FRAME_RELATED_P (tmp) = 1;
20648 /* Generate dwarf info. */
20649 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20650 gen_rtx_REG (SImode, j),
20651 NULL_RTX);
20653 offset += 4;
20654 j += 1;
20656 else /* j == PC_REGNUM */
20657 j++;
20659 else
20660 j++;
20662 /* Update the stack. */
20663 if (offset > 0)
20665 tmp = gen_rtx_SET (Pmode,
20666 stack_pointer_rtx,
20667 plus_constant (Pmode,
20668 stack_pointer_rtx,
20669 offset));
20670 tmp = emit_insn (tmp);
20671 arm_add_cfa_adjust_cfa_note (tmp, offset,
20672 stack_pointer_rtx, stack_pointer_rtx);
20673 offset = 0;
20676 if (saved_regs_mask & (1 << PC_REGNUM))
20678 /* Only PC is to be popped. */
20679 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20680 XVECEXP (par, 0, 0) = ret_rtx;
20681 tmp = gen_rtx_SET (SImode,
20682 gen_rtx_REG (SImode, PC_REGNUM),
20683 gen_frame_mem (SImode,
20684 gen_rtx_POST_INC (SImode,
20685 stack_pointer_rtx)));
20686 RTX_FRAME_RELATED_P (tmp) = 1;
20687 XVECEXP (par, 0, 1) = tmp;
20688 par = emit_jump_insn (par);
20690 /* Generate dwarf info. */
20691 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20692 gen_rtx_REG (SImode, PC_REGNUM),
20693 NULL_RTX);
20694 REG_NOTES (par) = dwarf;
20695 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20696 stack_pointer_rtx, stack_pointer_rtx);
20700 /* Calculate the size of the return value that is passed in registers. */
20701 static unsigned
20702 arm_size_return_regs (void)
20704 machine_mode mode;
20706 if (crtl->return_rtx != 0)
20707 mode = GET_MODE (crtl->return_rtx);
20708 else
20709 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20711 return GET_MODE_SIZE (mode);
20714 /* Return true if the current function needs to save/restore LR. */
20715 static bool
20716 thumb_force_lr_save (void)
20718 return !cfun->machine->lr_save_eliminated
20719 && (!leaf_function_p ()
20720 || thumb_far_jump_used_p ()
20721 || df_regs_ever_live_p (LR_REGNUM));
20724 /* We do not know if r3 will be available because
20725 we do have an indirect tailcall happening in this
20726 particular case. */
20727 static bool
20728 is_indirect_tailcall_p (rtx call)
20730 rtx pat = PATTERN (call);
20732 /* Indirect tail call. */
20733 pat = XVECEXP (pat, 0, 0);
20734 if (GET_CODE (pat) == SET)
20735 pat = SET_SRC (pat);
20737 pat = XEXP (XEXP (pat, 0), 0);
20738 return REG_P (pat);
20741 /* Return true if r3 is used by any of the tail call insns in the
20742 current function. */
20743 static bool
20744 any_sibcall_could_use_r3 (void)
20746 edge_iterator ei;
20747 edge e;
20749 if (!crtl->tail_call_emit)
20750 return false;
20751 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20752 if (e->flags & EDGE_SIBCALL)
20754 rtx call = BB_END (e->src);
20755 if (!CALL_P (call))
20756 call = prev_nonnote_nondebug_insn (call);
20757 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20758 if (find_regno_fusage (call, USE, 3)
20759 || is_indirect_tailcall_p (call))
20760 return true;
20762 return false;
20766 /* Compute the distance from register FROM to register TO.
20767 These can be the arg pointer (26), the soft frame pointer (25),
20768 the stack pointer (13) or the hard frame pointer (11).
20769 In thumb mode r7 is used as the soft frame pointer, if needed.
20770 Typical stack layout looks like this:
20772 old stack pointer -> | |
20773 ----
20774 | | \
20775 | | saved arguments for
20776 | | vararg functions
20777 | | /
20779 hard FP & arg pointer -> | | \
20780 | | stack
20781 | | frame
20782 | | /
20784 | | \
20785 | | call saved
20786 | | registers
20787 soft frame pointer -> | | /
20789 | | \
20790 | | local
20791 | | variables
20792 locals base pointer -> | | /
20794 | | \
20795 | | outgoing
20796 | | arguments
20797 current stack pointer -> | | /
20800 For a given function some or all of these stack components
20801 may not be needed, giving rise to the possibility of
20802 eliminating some of the registers.
20804 The values returned by this function must reflect the behavior
20805 of arm_expand_prologue() and arm_compute_save_reg_mask().
20807 The sign of the number returned reflects the direction of stack
20808 growth, so the values are positive for all eliminations except
20809 from the soft frame pointer to the hard frame pointer.
20811 SFP may point just inside the local variables block to ensure correct
20812 alignment. */
20815 /* Calculate stack offsets. These are used to calculate register elimination
20816 offsets and in prologue/epilogue code. Also calculates which registers
20817 should be saved. */
20819 static arm_stack_offsets *
20820 arm_get_frame_offsets (void)
20822 struct arm_stack_offsets *offsets;
20823 unsigned long func_type;
20824 int leaf;
20825 int saved;
20826 int core_saved;
20827 HOST_WIDE_INT frame_size;
20828 int i;
20830 offsets = &cfun->machine->stack_offsets;
20832 /* We need to know if we are a leaf function. Unfortunately, it
20833 is possible to be called after start_sequence has been called,
20834 which causes get_insns to return the insns for the sequence,
20835 not the function, which will cause leaf_function_p to return
20836 the incorrect result.
20838 to know about leaf functions once reload has completed, and the
20839 frame size cannot be changed after that time, so we can safely
20840 use the cached value. */
20842 if (reload_completed)
20843 return offsets;
20845 /* Initially this is the size of the local variables. It will translated
20846 into an offset once we have determined the size of preceding data. */
20847 frame_size = ROUND_UP_WORD (get_frame_size ());
20849 leaf = leaf_function_p ();
20851 /* Space for variadic functions. */
20852 offsets->saved_args = crtl->args.pretend_args_size;
20854 /* In Thumb mode this is incorrect, but never used. */
20855 offsets->frame
20856 = (offsets->saved_args
20857 + arm_compute_static_chain_stack_bytes ()
20858 + (frame_pointer_needed ? 4 : 0));
20860 if (TARGET_32BIT)
20862 unsigned int regno;
20864 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20865 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20866 saved = core_saved;
20868 /* We know that SP will be doubleword aligned on entry, and we must
20869 preserve that condition at any subroutine call. We also require the
20870 soft frame pointer to be doubleword aligned. */
20872 if (TARGET_REALLY_IWMMXT)
20874 /* Check for the call-saved iWMMXt registers. */
20875 for (regno = FIRST_IWMMXT_REGNUM;
20876 regno <= LAST_IWMMXT_REGNUM;
20877 regno++)
20878 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20879 saved += 8;
20882 func_type = arm_current_func_type ();
20883 /* Space for saved VFP registers. */
20884 if (! IS_VOLATILE (func_type)
20885 && TARGET_HARD_FLOAT && TARGET_VFP)
20886 saved += arm_get_vfp_saved_size ();
20888 else /* TARGET_THUMB1 */
20890 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20891 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20892 saved = core_saved;
20893 if (TARGET_BACKTRACE)
20894 saved += 16;
20897 /* Saved registers include the stack frame. */
20898 offsets->saved_regs
20899 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20900 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20902 /* A leaf function does not need any stack alignment if it has nothing
20903 on the stack. */
20904 if (leaf && frame_size == 0
20905 /* However if it calls alloca(), we have a dynamically allocated
20906 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20907 && ! cfun->calls_alloca)
20909 offsets->outgoing_args = offsets->soft_frame;
20910 offsets->locals_base = offsets->soft_frame;
20911 return offsets;
20914 /* Ensure SFP has the correct alignment. */
20915 if (ARM_DOUBLEWORD_ALIGN
20916 && (offsets->soft_frame & 7))
20918 offsets->soft_frame += 4;
20919 /* Try to align stack by pushing an extra reg. Don't bother doing this
20920 when there is a stack frame as the alignment will be rolled into
20921 the normal stack adjustment. */
20922 if (frame_size + crtl->outgoing_args_size == 0)
20924 int reg = -1;
20926 /* Register r3 is caller-saved. Normally it does not need to be
20927 saved on entry by the prologue. However if we choose to save
20928 it for padding then we may confuse the compiler into thinking
20929 a prologue sequence is required when in fact it is not. This
20930 will occur when shrink-wrapping if r3 is used as a scratch
20931 register and there are no other callee-saved writes.
20933 This situation can be avoided when other callee-saved registers
20934 are available and r3 is not mandatory if we choose a callee-saved
20935 register for padding. */
20936 bool prefer_callee_reg_p = false;
20938 /* If it is safe to use r3, then do so. This sometimes
20939 generates better code on Thumb-2 by avoiding the need to
20940 use 32-bit push/pop instructions. */
20941 if (! any_sibcall_could_use_r3 ()
20942 && arm_size_return_regs () <= 12
20943 && (offsets->saved_regs_mask & (1 << 3)) == 0
20944 && (TARGET_THUMB2
20945 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20947 reg = 3;
20948 if (!TARGET_THUMB2)
20949 prefer_callee_reg_p = true;
20951 if (reg == -1
20952 || prefer_callee_reg_p)
20954 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20956 /* Avoid fixed registers; they may be changed at
20957 arbitrary times so it's unsafe to restore them
20958 during the epilogue. */
20959 if (!fixed_regs[i]
20960 && (offsets->saved_regs_mask & (1 << i)) == 0)
20962 reg = i;
20963 break;
20968 if (reg != -1)
20970 offsets->saved_regs += 4;
20971 offsets->saved_regs_mask |= (1 << reg);
20976 offsets->locals_base = offsets->soft_frame + frame_size;
20977 offsets->outgoing_args = (offsets->locals_base
20978 + crtl->outgoing_args_size);
20980 if (ARM_DOUBLEWORD_ALIGN)
20982 /* Ensure SP remains doubleword aligned. */
20983 if (offsets->outgoing_args & 7)
20984 offsets->outgoing_args += 4;
20985 gcc_assert (!(offsets->outgoing_args & 7));
20988 return offsets;
20992 /* Calculate the relative offsets for the different stack pointers. Positive
20993 offsets are in the direction of stack growth. */
20995 HOST_WIDE_INT
20996 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20998 arm_stack_offsets *offsets;
21000 offsets = arm_get_frame_offsets ();
21002 /* OK, now we have enough information to compute the distances.
21003 There must be an entry in these switch tables for each pair
21004 of registers in ELIMINABLE_REGS, even if some of the entries
21005 seem to be redundant or useless. */
21006 switch (from)
21008 case ARG_POINTER_REGNUM:
21009 switch (to)
21011 case THUMB_HARD_FRAME_POINTER_REGNUM:
21012 return 0;
21014 case FRAME_POINTER_REGNUM:
21015 /* This is the reverse of the soft frame pointer
21016 to hard frame pointer elimination below. */
21017 return offsets->soft_frame - offsets->saved_args;
21019 case ARM_HARD_FRAME_POINTER_REGNUM:
21020 /* This is only non-zero in the case where the static chain register
21021 is stored above the frame. */
21022 return offsets->frame - offsets->saved_args - 4;
21024 case STACK_POINTER_REGNUM:
21025 /* If nothing has been pushed on the stack at all
21026 then this will return -4. This *is* correct! */
21027 return offsets->outgoing_args - (offsets->saved_args + 4);
21029 default:
21030 gcc_unreachable ();
21032 gcc_unreachable ();
21034 case FRAME_POINTER_REGNUM:
21035 switch (to)
21037 case THUMB_HARD_FRAME_POINTER_REGNUM:
21038 return 0;
21040 case ARM_HARD_FRAME_POINTER_REGNUM:
21041 /* The hard frame pointer points to the top entry in the
21042 stack frame. The soft frame pointer to the bottom entry
21043 in the stack frame. If there is no stack frame at all,
21044 then they are identical. */
21046 return offsets->frame - offsets->soft_frame;
21048 case STACK_POINTER_REGNUM:
21049 return offsets->outgoing_args - offsets->soft_frame;
21051 default:
21052 gcc_unreachable ();
21054 gcc_unreachable ();
21056 default:
21057 /* You cannot eliminate from the stack pointer.
21058 In theory you could eliminate from the hard frame
21059 pointer to the stack pointer, but this will never
21060 happen, since if a stack frame is not needed the
21061 hard frame pointer will never be used. */
21062 gcc_unreachable ();
21066 /* Given FROM and TO register numbers, say whether this elimination is
21067 allowed. Frame pointer elimination is automatically handled.
21069 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21070 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21071 pointer, we must eliminate FRAME_POINTER_REGNUM into
21072 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21073 ARG_POINTER_REGNUM. */
21075 bool
21076 arm_can_eliminate (const int from, const int to)
21078 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21079 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21080 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21081 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21082 true);
21085 /* Emit RTL to save coprocessor registers on function entry. Returns the
21086 number of bytes pushed. */
21088 static int
21089 arm_save_coproc_regs(void)
21091 int saved_size = 0;
21092 unsigned reg;
21093 unsigned start_reg;
21094 rtx insn;
21096 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21097 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21099 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21100 insn = gen_rtx_MEM (V2SImode, insn);
21101 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21102 RTX_FRAME_RELATED_P (insn) = 1;
21103 saved_size += 8;
21106 if (TARGET_HARD_FLOAT && TARGET_VFP)
21108 start_reg = FIRST_VFP_REGNUM;
21110 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21112 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21113 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21115 if (start_reg != reg)
21116 saved_size += vfp_emit_fstmd (start_reg,
21117 (reg - start_reg) / 2);
21118 start_reg = reg + 2;
21121 if (start_reg != reg)
21122 saved_size += vfp_emit_fstmd (start_reg,
21123 (reg - start_reg) / 2);
21125 return saved_size;
21129 /* Set the Thumb frame pointer from the stack pointer. */
21131 static void
21132 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21134 HOST_WIDE_INT amount;
21135 rtx insn, dwarf;
21137 amount = offsets->outgoing_args - offsets->locals_base;
21138 if (amount < 1024)
21139 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21140 stack_pointer_rtx, GEN_INT (amount)));
21141 else
21143 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21144 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21145 expects the first two operands to be the same. */
21146 if (TARGET_THUMB2)
21148 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21149 stack_pointer_rtx,
21150 hard_frame_pointer_rtx));
21152 else
21154 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21155 hard_frame_pointer_rtx,
21156 stack_pointer_rtx));
21158 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
21159 plus_constant (Pmode, stack_pointer_rtx, amount));
21160 RTX_FRAME_RELATED_P (dwarf) = 1;
21161 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21164 RTX_FRAME_RELATED_P (insn) = 1;
21167 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21168 function. */
21169 void
21170 arm_expand_prologue (void)
21172 rtx amount;
21173 rtx insn;
21174 rtx ip_rtx;
21175 unsigned long live_regs_mask;
21176 unsigned long func_type;
21177 int fp_offset = 0;
21178 int saved_pretend_args = 0;
21179 int saved_regs = 0;
21180 unsigned HOST_WIDE_INT args_to_push;
21181 arm_stack_offsets *offsets;
21183 func_type = arm_current_func_type ();
21185 /* Naked functions don't have prologues. */
21186 if (IS_NAKED (func_type))
21187 return;
21189 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21190 args_to_push = crtl->args.pretend_args_size;
21192 /* Compute which register we will have to save onto the stack. */
21193 offsets = arm_get_frame_offsets ();
21194 live_regs_mask = offsets->saved_regs_mask;
21196 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21198 if (IS_STACKALIGN (func_type))
21200 rtx r0, r1;
21202 /* Handle a word-aligned stack pointer. We generate the following:
21204 mov r0, sp
21205 bic r1, r0, #7
21206 mov sp, r1
21207 <save and restore r0 in normal prologue/epilogue>
21208 mov sp, r0
21209 bx lr
21211 The unwinder doesn't need to know about the stack realignment.
21212 Just tell it we saved SP in r0. */
21213 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21215 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21216 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21218 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21219 RTX_FRAME_RELATED_P (insn) = 1;
21220 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21222 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21224 /* ??? The CFA changes here, which may cause GDB to conclude that it
21225 has entered a different function. That said, the unwind info is
21226 correct, individually, before and after this instruction because
21227 we've described the save of SP, which will override the default
21228 handling of SP as restoring from the CFA. */
21229 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21232 /* For APCS frames, if IP register is clobbered
21233 when creating frame, save that register in a special
21234 way. */
21235 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21237 if (IS_INTERRUPT (func_type))
21239 /* Interrupt functions must not corrupt any registers.
21240 Creating a frame pointer however, corrupts the IP
21241 register, so we must push it first. */
21242 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21244 /* Do not set RTX_FRAME_RELATED_P on this insn.
21245 The dwarf stack unwinding code only wants to see one
21246 stack decrement per function, and this is not it. If
21247 this instruction is labeled as being part of the frame
21248 creation sequence then dwarf2out_frame_debug_expr will
21249 die when it encounters the assignment of IP to FP
21250 later on, since the use of SP here establishes SP as
21251 the CFA register and not IP.
21253 Anyway this instruction is not really part of the stack
21254 frame creation although it is part of the prologue. */
21256 else if (IS_NESTED (func_type))
21258 /* The static chain register is the same as the IP register
21259 used as a scratch register during stack frame creation.
21260 To get around this need to find somewhere to store IP
21261 whilst the frame is being created. We try the following
21262 places in order:
21264 1. The last argument register r3 if it is available.
21265 2. A slot on the stack above the frame if there are no
21266 arguments to push onto the stack.
21267 3. Register r3 again, after pushing the argument registers
21268 onto the stack, if this is a varargs function.
21269 4. The last slot on the stack created for the arguments to
21270 push, if this isn't a varargs function.
21272 Note - we only need to tell the dwarf2 backend about the SP
21273 adjustment in the second variant; the static chain register
21274 doesn't need to be unwound, as it doesn't contain a value
21275 inherited from the caller. */
21277 if (!arm_r3_live_at_start_p ())
21278 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21279 else if (args_to_push == 0)
21281 rtx addr, dwarf;
21283 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21284 saved_regs += 4;
21286 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21287 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21288 fp_offset = 4;
21290 /* Just tell the dwarf backend that we adjusted SP. */
21291 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21292 plus_constant (Pmode, stack_pointer_rtx,
21293 -fp_offset));
21294 RTX_FRAME_RELATED_P (insn) = 1;
21295 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21297 else
21299 /* Store the args on the stack. */
21300 if (cfun->machine->uses_anonymous_args)
21302 insn
21303 = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21304 (0xf0 >> (args_to_push / 4)) & 0xf);
21305 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21306 saved_pretend_args = 1;
21308 else
21310 rtx addr, dwarf;
21312 if (args_to_push == 4)
21313 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21314 else
21315 addr
21316 = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21317 plus_constant (Pmode,
21318 stack_pointer_rtx,
21319 -args_to_push));
21321 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21323 /* Just tell the dwarf backend that we adjusted SP. */
21324 dwarf
21325 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21326 plus_constant (Pmode, stack_pointer_rtx,
21327 -args_to_push));
21328 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21331 RTX_FRAME_RELATED_P (insn) = 1;
21332 fp_offset = args_to_push;
21333 args_to_push = 0;
21337 insn = emit_set_insn (ip_rtx,
21338 plus_constant (Pmode, stack_pointer_rtx,
21339 fp_offset));
21340 RTX_FRAME_RELATED_P (insn) = 1;
21343 if (args_to_push)
21345 /* Push the argument registers, or reserve space for them. */
21346 if (cfun->machine->uses_anonymous_args)
21347 insn = emit_multi_reg_push
21348 ((0xf0 >> (args_to_push / 4)) & 0xf,
21349 (0xf0 >> (args_to_push / 4)) & 0xf);
21350 else
21351 insn = emit_insn
21352 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21353 GEN_INT (- args_to_push)));
21354 RTX_FRAME_RELATED_P (insn) = 1;
21357 /* If this is an interrupt service routine, and the link register
21358 is going to be pushed, and we're not generating extra
21359 push of IP (needed when frame is needed and frame layout if apcs),
21360 subtracting four from LR now will mean that the function return
21361 can be done with a single instruction. */
21362 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21363 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21364 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21365 && TARGET_ARM)
21367 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21369 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21372 if (live_regs_mask)
21374 unsigned long dwarf_regs_mask = live_regs_mask;
21376 saved_regs += bit_count (live_regs_mask) * 4;
21377 if (optimize_size && !frame_pointer_needed
21378 && saved_regs == offsets->saved_regs - offsets->saved_args)
21380 /* If no coprocessor registers are being pushed and we don't have
21381 to worry about a frame pointer then push extra registers to
21382 create the stack frame. This is done is a way that does not
21383 alter the frame layout, so is independent of the epilogue. */
21384 int n;
21385 int frame;
21386 n = 0;
21387 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21388 n++;
21389 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21390 if (frame && n * 4 >= frame)
21392 n = frame / 4;
21393 live_regs_mask |= (1 << n) - 1;
21394 saved_regs += frame;
21398 if (TARGET_LDRD
21399 && current_tune->prefer_ldrd_strd
21400 && !optimize_function_for_size_p (cfun))
21402 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21403 if (TARGET_THUMB2)
21404 thumb2_emit_strd_push (live_regs_mask);
21405 else if (TARGET_ARM
21406 && !TARGET_APCS_FRAME
21407 && !IS_INTERRUPT (func_type))
21408 arm_emit_strd_push (live_regs_mask);
21409 else
21411 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21412 RTX_FRAME_RELATED_P (insn) = 1;
21415 else
21417 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21418 RTX_FRAME_RELATED_P (insn) = 1;
21422 if (! IS_VOLATILE (func_type))
21423 saved_regs += arm_save_coproc_regs ();
21425 if (frame_pointer_needed && TARGET_ARM)
21427 /* Create the new frame pointer. */
21428 if (TARGET_APCS_FRAME)
21430 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21431 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21432 RTX_FRAME_RELATED_P (insn) = 1;
21434 if (IS_NESTED (func_type))
21436 /* Recover the static chain register. */
21437 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21438 insn = gen_rtx_REG (SImode, 3);
21439 else
21441 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21442 insn = gen_frame_mem (SImode, insn);
21444 emit_set_insn (ip_rtx, insn);
21445 /* Add a USE to stop propagate_one_insn() from barfing. */
21446 emit_insn (gen_force_register_use (ip_rtx));
21449 else
21451 insn = GEN_INT (saved_regs - 4);
21452 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21453 stack_pointer_rtx, insn));
21454 RTX_FRAME_RELATED_P (insn) = 1;
21458 if (flag_stack_usage_info)
21459 current_function_static_stack_size
21460 = offsets->outgoing_args - offsets->saved_args;
21462 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21464 /* This add can produce multiple insns for a large constant, so we
21465 need to get tricky. */
21466 rtx_insn *last = get_last_insn ();
21468 amount = GEN_INT (offsets->saved_args + saved_regs
21469 - offsets->outgoing_args);
21471 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21472 amount));
21475 last = last ? NEXT_INSN (last) : get_insns ();
21476 RTX_FRAME_RELATED_P (last) = 1;
21478 while (last != insn);
21480 /* If the frame pointer is needed, emit a special barrier that
21481 will prevent the scheduler from moving stores to the frame
21482 before the stack adjustment. */
21483 if (frame_pointer_needed)
21484 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21485 hard_frame_pointer_rtx));
21489 if (frame_pointer_needed && TARGET_THUMB2)
21490 thumb_set_frame_pointer (offsets);
21492 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21494 unsigned long mask;
21496 mask = live_regs_mask;
21497 mask &= THUMB2_WORK_REGS;
21498 if (!IS_NESTED (func_type))
21499 mask |= (1 << IP_REGNUM);
21500 arm_load_pic_register (mask);
21503 /* If we are profiling, make sure no instructions are scheduled before
21504 the call to mcount. Similarly if the user has requested no
21505 scheduling in the prolog. Similarly if we want non-call exceptions
21506 using the EABI unwinder, to prevent faulting instructions from being
21507 swapped with a stack adjustment. */
21508 if (crtl->profile || !TARGET_SCHED_PROLOG
21509 || (arm_except_unwind_info (&global_options) == UI_TARGET
21510 && cfun->can_throw_non_call_exceptions))
21511 emit_insn (gen_blockage ());
21513 /* If the link register is being kept alive, with the return address in it,
21514 then make sure that it does not get reused by the ce2 pass. */
21515 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21516 cfun->machine->lr_save_eliminated = 1;
21519 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21520 static void
21521 arm_print_condition (FILE *stream)
21523 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21525 /* Branch conversion is not implemented for Thumb-2. */
21526 if (TARGET_THUMB)
21528 output_operand_lossage ("predicated Thumb instruction");
21529 return;
21531 if (current_insn_predicate != NULL)
21533 output_operand_lossage
21534 ("predicated instruction in conditional sequence");
21535 return;
21538 fputs (arm_condition_codes[arm_current_cc], stream);
21540 else if (current_insn_predicate)
21542 enum arm_cond_code code;
21544 if (TARGET_THUMB1)
21546 output_operand_lossage ("predicated Thumb instruction");
21547 return;
21550 code = get_arm_condition_code (current_insn_predicate);
21551 fputs (arm_condition_codes[code], stream);
21556 /* Globally reserved letters: acln
21557 Puncutation letters currently used: @_|?().!#
21558 Lower case letters currently used: bcdefhimpqtvwxyz
21559 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21560 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21562 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21564 If CODE is 'd', then the X is a condition operand and the instruction
21565 should only be executed if the condition is true.
21566 if CODE is 'D', then the X is a condition operand and the instruction
21567 should only be executed if the condition is false: however, if the mode
21568 of the comparison is CCFPEmode, then always execute the instruction -- we
21569 do this because in these circumstances !GE does not necessarily imply LT;
21570 in these cases the instruction pattern will take care to make sure that
21571 an instruction containing %d will follow, thereby undoing the effects of
21572 doing this instruction unconditionally.
21573 If CODE is 'N' then X is a floating point operand that must be negated
21574 before output.
21575 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21576 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21577 static void
21578 arm_print_operand (FILE *stream, rtx x, int code)
21580 switch (code)
21582 case '@':
21583 fputs (ASM_COMMENT_START, stream);
21584 return;
21586 case '_':
21587 fputs (user_label_prefix, stream);
21588 return;
21590 case '|':
21591 fputs (REGISTER_PREFIX, stream);
21592 return;
21594 case '?':
21595 arm_print_condition (stream);
21596 return;
21598 case '(':
21599 /* Nothing in unified syntax, otherwise the current condition code. */
21600 if (!TARGET_UNIFIED_ASM)
21601 arm_print_condition (stream);
21602 break;
21604 case ')':
21605 /* The current condition code in unified syntax, otherwise nothing. */
21606 if (TARGET_UNIFIED_ASM)
21607 arm_print_condition (stream);
21608 break;
21610 case '.':
21611 /* The current condition code for a condition code setting instruction.
21612 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21613 if (TARGET_UNIFIED_ASM)
21615 fputc('s', stream);
21616 arm_print_condition (stream);
21618 else
21620 arm_print_condition (stream);
21621 fputc('s', stream);
21623 return;
21625 case '!':
21626 /* If the instruction is conditionally executed then print
21627 the current condition code, otherwise print 's'. */
21628 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21629 if (current_insn_predicate)
21630 arm_print_condition (stream);
21631 else
21632 fputc('s', stream);
21633 break;
21635 /* %# is a "break" sequence. It doesn't output anything, but is used to
21636 separate e.g. operand numbers from following text, if that text consists
21637 of further digits which we don't want to be part of the operand
21638 number. */
21639 case '#':
21640 return;
21642 case 'N':
21644 REAL_VALUE_TYPE r;
21645 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21646 r = real_value_negate (&r);
21647 fprintf (stream, "%s", fp_const_from_val (&r));
21649 return;
21651 /* An integer or symbol address without a preceding # sign. */
21652 case 'c':
21653 switch (GET_CODE (x))
21655 case CONST_INT:
21656 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21657 break;
21659 case SYMBOL_REF:
21660 output_addr_const (stream, x);
21661 break;
21663 case CONST:
21664 if (GET_CODE (XEXP (x, 0)) == PLUS
21665 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21667 output_addr_const (stream, x);
21668 break;
21670 /* Fall through. */
21672 default:
21673 output_operand_lossage ("Unsupported operand for code '%c'", code);
21675 return;
21677 /* An integer that we want to print in HEX. */
21678 case 'x':
21679 switch (GET_CODE (x))
21681 case CONST_INT:
21682 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21683 break;
21685 default:
21686 output_operand_lossage ("Unsupported operand for code '%c'", code);
21688 return;
21690 case 'B':
21691 if (CONST_INT_P (x))
21693 HOST_WIDE_INT val;
21694 val = ARM_SIGN_EXTEND (~INTVAL (x));
21695 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21697 else
21699 putc ('~', stream);
21700 output_addr_const (stream, x);
21702 return;
21704 case 'b':
21705 /* Print the log2 of a CONST_INT. */
21707 HOST_WIDE_INT val;
21709 if (!CONST_INT_P (x)
21710 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21711 output_operand_lossage ("Unsupported operand for code '%c'", code);
21712 else
21713 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21715 return;
21717 case 'L':
21718 /* The low 16 bits of an immediate constant. */
21719 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21720 return;
21722 case 'i':
21723 fprintf (stream, "%s", arithmetic_instr (x, 1));
21724 return;
21726 case 'I':
21727 fprintf (stream, "%s", arithmetic_instr (x, 0));
21728 return;
21730 case 'S':
21732 HOST_WIDE_INT val;
21733 const char *shift;
21735 shift = shift_op (x, &val);
21737 if (shift)
21739 fprintf (stream, ", %s ", shift);
21740 if (val == -1)
21741 arm_print_operand (stream, XEXP (x, 1), 0);
21742 else
21743 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21746 return;
21748 /* An explanation of the 'Q', 'R' and 'H' register operands:
21750 In a pair of registers containing a DI or DF value the 'Q'
21751 operand returns the register number of the register containing
21752 the least significant part of the value. The 'R' operand returns
21753 the register number of the register containing the most
21754 significant part of the value.
21756 The 'H' operand returns the higher of the two register numbers.
21757 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21758 same as the 'Q' operand, since the most significant part of the
21759 value is held in the lower number register. The reverse is true
21760 on systems where WORDS_BIG_ENDIAN is false.
21762 The purpose of these operands is to distinguish between cases
21763 where the endian-ness of the values is important (for example
21764 when they are added together), and cases where the endian-ness
21765 is irrelevant, but the order of register operations is important.
21766 For example when loading a value from memory into a register
21767 pair, the endian-ness does not matter. Provided that the value
21768 from the lower memory address is put into the lower numbered
21769 register, and the value from the higher address is put into the
21770 higher numbered register, the load will work regardless of whether
21771 the value being loaded is big-wordian or little-wordian. The
21772 order of the two register loads can matter however, if the address
21773 of the memory location is actually held in one of the registers
21774 being overwritten by the load.
21776 The 'Q' and 'R' constraints are also available for 64-bit
21777 constants. */
21778 case 'Q':
21779 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21781 rtx part = gen_lowpart (SImode, x);
21782 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21783 return;
21786 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21788 output_operand_lossage ("invalid operand for code '%c'", code);
21789 return;
21792 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21793 return;
21795 case 'R':
21796 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21798 machine_mode mode = GET_MODE (x);
21799 rtx part;
21801 if (mode == VOIDmode)
21802 mode = DImode;
21803 part = gen_highpart_mode (SImode, mode, x);
21804 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21805 return;
21808 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21810 output_operand_lossage ("invalid operand for code '%c'", code);
21811 return;
21814 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21815 return;
21817 case 'H':
21818 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21820 output_operand_lossage ("invalid operand for code '%c'", code);
21821 return;
21824 asm_fprintf (stream, "%r", REGNO (x) + 1);
21825 return;
21827 case 'J':
21828 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21830 output_operand_lossage ("invalid operand for code '%c'", code);
21831 return;
21834 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21835 return;
21837 case 'K':
21838 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21840 output_operand_lossage ("invalid operand for code '%c'", code);
21841 return;
21844 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21845 return;
21847 case 'm':
21848 asm_fprintf (stream, "%r",
21849 REG_P (XEXP (x, 0))
21850 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21851 return;
21853 case 'M':
21854 asm_fprintf (stream, "{%r-%r}",
21855 REGNO (x),
21856 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21857 return;
21859 /* Like 'M', but writing doubleword vector registers, for use by Neon
21860 insns. */
21861 case 'h':
21863 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21864 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21865 if (numregs == 1)
21866 asm_fprintf (stream, "{d%d}", regno);
21867 else
21868 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21870 return;
21872 case 'd':
21873 /* CONST_TRUE_RTX means always -- that's the default. */
21874 if (x == const_true_rtx)
21875 return;
21877 if (!COMPARISON_P (x))
21879 output_operand_lossage ("invalid operand for code '%c'", code);
21880 return;
21883 fputs (arm_condition_codes[get_arm_condition_code (x)],
21884 stream);
21885 return;
21887 case 'D':
21888 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21889 want to do that. */
21890 if (x == const_true_rtx)
21892 output_operand_lossage ("instruction never executed");
21893 return;
21895 if (!COMPARISON_P (x))
21897 output_operand_lossage ("invalid operand for code '%c'", code);
21898 return;
21901 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21902 (get_arm_condition_code (x))],
21903 stream);
21904 return;
21906 case 's':
21907 case 'V':
21908 case 'W':
21909 case 'X':
21910 case 'Y':
21911 case 'Z':
21912 /* Former Maverick support, removed after GCC-4.7. */
21913 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21914 return;
21916 case 'U':
21917 if (!REG_P (x)
21918 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21919 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21920 /* Bad value for wCG register number. */
21922 output_operand_lossage ("invalid operand for code '%c'", code);
21923 return;
21926 else
21927 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21928 return;
21930 /* Print an iWMMXt control register name. */
21931 case 'w':
21932 if (!CONST_INT_P (x)
21933 || INTVAL (x) < 0
21934 || INTVAL (x) >= 16)
21935 /* Bad value for wC register number. */
21937 output_operand_lossage ("invalid operand for code '%c'", code);
21938 return;
21941 else
21943 static const char * wc_reg_names [16] =
21945 "wCID", "wCon", "wCSSF", "wCASF",
21946 "wC4", "wC5", "wC6", "wC7",
21947 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21948 "wC12", "wC13", "wC14", "wC15"
21951 fputs (wc_reg_names [INTVAL (x)], stream);
21953 return;
21955 /* Print the high single-precision register of a VFP double-precision
21956 register. */
21957 case 'p':
21959 machine_mode mode = GET_MODE (x);
21960 int regno;
21962 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21964 output_operand_lossage ("invalid operand for code '%c'", code);
21965 return;
21968 regno = REGNO (x);
21969 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21971 output_operand_lossage ("invalid operand for code '%c'", code);
21972 return;
21975 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21977 return;
21979 /* Print a VFP/Neon double precision or quad precision register name. */
21980 case 'P':
21981 case 'q':
21983 machine_mode mode = GET_MODE (x);
21984 int is_quad = (code == 'q');
21985 int regno;
21987 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21989 output_operand_lossage ("invalid operand for code '%c'", code);
21990 return;
21993 if (!REG_P (x)
21994 || !IS_VFP_REGNUM (REGNO (x)))
21996 output_operand_lossage ("invalid operand for code '%c'", code);
21997 return;
22000 regno = REGNO (x);
22001 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22002 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22004 output_operand_lossage ("invalid operand for code '%c'", code);
22005 return;
22008 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22009 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22011 return;
22013 /* These two codes print the low/high doubleword register of a Neon quad
22014 register, respectively. For pair-structure types, can also print
22015 low/high quadword registers. */
22016 case 'e':
22017 case 'f':
22019 machine_mode mode = GET_MODE (x);
22020 int regno;
22022 if ((GET_MODE_SIZE (mode) != 16
22023 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22025 output_operand_lossage ("invalid operand for code '%c'", code);
22026 return;
22029 regno = REGNO (x);
22030 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22032 output_operand_lossage ("invalid operand for code '%c'", code);
22033 return;
22036 if (GET_MODE_SIZE (mode) == 16)
22037 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22038 + (code == 'f' ? 1 : 0));
22039 else
22040 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22041 + (code == 'f' ? 1 : 0));
22043 return;
22045 /* Print a VFPv3 floating-point constant, represented as an integer
22046 index. */
22047 case 'G':
22049 int index = vfp3_const_double_index (x);
22050 gcc_assert (index != -1);
22051 fprintf (stream, "%d", index);
22053 return;
22055 /* Print bits representing opcode features for Neon.
22057 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22058 and polynomials as unsigned.
22060 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22062 Bit 2 is 1 for rounding functions, 0 otherwise. */
22064 /* Identify the type as 's', 'u', 'p' or 'f'. */
22065 case 'T':
22067 HOST_WIDE_INT bits = INTVAL (x);
22068 fputc ("uspf"[bits & 3], stream);
22070 return;
22072 /* Likewise, but signed and unsigned integers are both 'i'. */
22073 case 'F':
22075 HOST_WIDE_INT bits = INTVAL (x);
22076 fputc ("iipf"[bits & 3], stream);
22078 return;
22080 /* As for 'T', but emit 'u' instead of 'p'. */
22081 case 't':
22083 HOST_WIDE_INT bits = INTVAL (x);
22084 fputc ("usuf"[bits & 3], stream);
22086 return;
22088 /* Bit 2: rounding (vs none). */
22089 case 'O':
22091 HOST_WIDE_INT bits = INTVAL (x);
22092 fputs ((bits & 4) != 0 ? "r" : "", stream);
22094 return;
22096 /* Memory operand for vld1/vst1 instruction. */
22097 case 'A':
22099 rtx addr;
22100 bool postinc = FALSE;
22101 rtx postinc_reg = NULL;
22102 unsigned align, memsize, align_bits;
22104 gcc_assert (MEM_P (x));
22105 addr = XEXP (x, 0);
22106 if (GET_CODE (addr) == POST_INC)
22108 postinc = 1;
22109 addr = XEXP (addr, 0);
22111 if (GET_CODE (addr) == POST_MODIFY)
22113 postinc_reg = XEXP( XEXP (addr, 1), 1);
22114 addr = XEXP (addr, 0);
22116 asm_fprintf (stream, "[%r", REGNO (addr));
22118 /* We know the alignment of this access, so we can emit a hint in the
22119 instruction (for some alignments) as an aid to the memory subsystem
22120 of the target. */
22121 align = MEM_ALIGN (x) >> 3;
22122 memsize = MEM_SIZE (x);
22124 /* Only certain alignment specifiers are supported by the hardware. */
22125 if (memsize == 32 && (align % 32) == 0)
22126 align_bits = 256;
22127 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22128 align_bits = 128;
22129 else if (memsize >= 8 && (align % 8) == 0)
22130 align_bits = 64;
22131 else
22132 align_bits = 0;
22134 if (align_bits != 0)
22135 asm_fprintf (stream, ":%d", align_bits);
22137 asm_fprintf (stream, "]");
22139 if (postinc)
22140 fputs("!", stream);
22141 if (postinc_reg)
22142 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22144 return;
22146 case 'C':
22148 rtx addr;
22150 gcc_assert (MEM_P (x));
22151 addr = XEXP (x, 0);
22152 gcc_assert (REG_P (addr));
22153 asm_fprintf (stream, "[%r]", REGNO (addr));
22155 return;
22157 /* Translate an S register number into a D register number and element index. */
22158 case 'y':
22160 machine_mode mode = GET_MODE (x);
22161 int regno;
22163 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22165 output_operand_lossage ("invalid operand for code '%c'", code);
22166 return;
22169 regno = REGNO (x);
22170 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22172 output_operand_lossage ("invalid operand for code '%c'", code);
22173 return;
22176 regno = regno - FIRST_VFP_REGNUM;
22177 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22179 return;
22181 case 'v':
22182 gcc_assert (CONST_DOUBLE_P (x));
22183 int result;
22184 result = vfp3_const_double_for_fract_bits (x);
22185 if (result == 0)
22186 result = vfp3_const_double_for_bits (x);
22187 fprintf (stream, "#%d", result);
22188 return;
22190 /* Register specifier for vld1.16/vst1.16. Translate the S register
22191 number into a D register number and element index. */
22192 case 'z':
22194 machine_mode mode = GET_MODE (x);
22195 int regno;
22197 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22199 output_operand_lossage ("invalid operand for code '%c'", code);
22200 return;
22203 regno = REGNO (x);
22204 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22206 output_operand_lossage ("invalid operand for code '%c'", code);
22207 return;
22210 regno = regno - FIRST_VFP_REGNUM;
22211 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22213 return;
22215 default:
22216 if (x == 0)
22218 output_operand_lossage ("missing operand");
22219 return;
22222 switch (GET_CODE (x))
22224 case REG:
22225 asm_fprintf (stream, "%r", REGNO (x));
22226 break;
22228 case MEM:
22229 output_memory_reference_mode = GET_MODE (x);
22230 output_address (XEXP (x, 0));
22231 break;
22233 case CONST_DOUBLE:
22235 char fpstr[20];
22236 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22237 sizeof (fpstr), 0, 1);
22238 fprintf (stream, "#%s", fpstr);
22240 break;
22242 default:
22243 gcc_assert (GET_CODE (x) != NEG);
22244 fputc ('#', stream);
22245 if (GET_CODE (x) == HIGH)
22247 fputs (":lower16:", stream);
22248 x = XEXP (x, 0);
22251 output_addr_const (stream, x);
22252 break;
22257 /* Target hook for printing a memory address. */
22258 static void
22259 arm_print_operand_address (FILE *stream, rtx x)
22261 if (TARGET_32BIT)
22263 int is_minus = GET_CODE (x) == MINUS;
22265 if (REG_P (x))
22266 asm_fprintf (stream, "[%r]", REGNO (x));
22267 else if (GET_CODE (x) == PLUS || is_minus)
22269 rtx base = XEXP (x, 0);
22270 rtx index = XEXP (x, 1);
22271 HOST_WIDE_INT offset = 0;
22272 if (!REG_P (base)
22273 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22275 /* Ensure that BASE is a register. */
22276 /* (one of them must be). */
22277 /* Also ensure the SP is not used as in index register. */
22278 std::swap (base, index);
22280 switch (GET_CODE (index))
22282 case CONST_INT:
22283 offset = INTVAL (index);
22284 if (is_minus)
22285 offset = -offset;
22286 asm_fprintf (stream, "[%r, #%wd]",
22287 REGNO (base), offset);
22288 break;
22290 case REG:
22291 asm_fprintf (stream, "[%r, %s%r]",
22292 REGNO (base), is_minus ? "-" : "",
22293 REGNO (index));
22294 break;
22296 case MULT:
22297 case ASHIFTRT:
22298 case LSHIFTRT:
22299 case ASHIFT:
22300 case ROTATERT:
22302 asm_fprintf (stream, "[%r, %s%r",
22303 REGNO (base), is_minus ? "-" : "",
22304 REGNO (XEXP (index, 0)));
22305 arm_print_operand (stream, index, 'S');
22306 fputs ("]", stream);
22307 break;
22310 default:
22311 gcc_unreachable ();
22314 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22315 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22317 extern machine_mode output_memory_reference_mode;
22319 gcc_assert (REG_P (XEXP (x, 0)));
22321 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22322 asm_fprintf (stream, "[%r, #%s%d]!",
22323 REGNO (XEXP (x, 0)),
22324 GET_CODE (x) == PRE_DEC ? "-" : "",
22325 GET_MODE_SIZE (output_memory_reference_mode));
22326 else
22327 asm_fprintf (stream, "[%r], #%s%d",
22328 REGNO (XEXP (x, 0)),
22329 GET_CODE (x) == POST_DEC ? "-" : "",
22330 GET_MODE_SIZE (output_memory_reference_mode));
22332 else if (GET_CODE (x) == PRE_MODIFY)
22334 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22335 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22336 asm_fprintf (stream, "#%wd]!",
22337 INTVAL (XEXP (XEXP (x, 1), 1)));
22338 else
22339 asm_fprintf (stream, "%r]!",
22340 REGNO (XEXP (XEXP (x, 1), 1)));
22342 else if (GET_CODE (x) == POST_MODIFY)
22344 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22345 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22346 asm_fprintf (stream, "#%wd",
22347 INTVAL (XEXP (XEXP (x, 1), 1)));
22348 else
22349 asm_fprintf (stream, "%r",
22350 REGNO (XEXP (XEXP (x, 1), 1)));
22352 else output_addr_const (stream, x);
22354 else
22356 if (REG_P (x))
22357 asm_fprintf (stream, "[%r]", REGNO (x));
22358 else if (GET_CODE (x) == POST_INC)
22359 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22360 else if (GET_CODE (x) == PLUS)
22362 gcc_assert (REG_P (XEXP (x, 0)));
22363 if (CONST_INT_P (XEXP (x, 1)))
22364 asm_fprintf (stream, "[%r, #%wd]",
22365 REGNO (XEXP (x, 0)),
22366 INTVAL (XEXP (x, 1)));
22367 else
22368 asm_fprintf (stream, "[%r, %r]",
22369 REGNO (XEXP (x, 0)),
22370 REGNO (XEXP (x, 1)));
22372 else
22373 output_addr_const (stream, x);
22377 /* Target hook for indicating whether a punctuation character for
22378 TARGET_PRINT_OPERAND is valid. */
22379 static bool
22380 arm_print_operand_punct_valid_p (unsigned char code)
22382 return (code == '@' || code == '|' || code == '.'
22383 || code == '(' || code == ')' || code == '#'
22384 || (TARGET_32BIT && (code == '?'))
22385 || (TARGET_THUMB2 && (code == '!'))
22386 || (TARGET_THUMB && (code == '_')));
22389 /* Target hook for assembling integer objects. The ARM version needs to
22390 handle word-sized values specially. */
22391 static bool
22392 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22394 machine_mode mode;
22396 if (size == UNITS_PER_WORD && aligned_p)
22398 fputs ("\t.word\t", asm_out_file);
22399 output_addr_const (asm_out_file, x);
22401 /* Mark symbols as position independent. We only do this in the
22402 .text segment, not in the .data segment. */
22403 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22404 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22406 /* See legitimize_pic_address for an explanation of the
22407 TARGET_VXWORKS_RTP check. */
22408 if (!arm_pic_data_is_text_relative
22409 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22410 fputs ("(GOT)", asm_out_file);
22411 else
22412 fputs ("(GOTOFF)", asm_out_file);
22414 fputc ('\n', asm_out_file);
22415 return true;
22418 mode = GET_MODE (x);
22420 if (arm_vector_mode_supported_p (mode))
22422 int i, units;
22424 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22426 units = CONST_VECTOR_NUNITS (x);
22427 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22429 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22430 for (i = 0; i < units; i++)
22432 rtx elt = CONST_VECTOR_ELT (x, i);
22433 assemble_integer
22434 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22436 else
22437 for (i = 0; i < units; i++)
22439 rtx elt = CONST_VECTOR_ELT (x, i);
22440 REAL_VALUE_TYPE rval;
22442 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22444 assemble_real
22445 (rval, GET_MODE_INNER (mode),
22446 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22449 return true;
22452 return default_assemble_integer (x, size, aligned_p);
22455 static void
22456 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22458 section *s;
22460 if (!TARGET_AAPCS_BASED)
22462 (is_ctor ?
22463 default_named_section_asm_out_constructor
22464 : default_named_section_asm_out_destructor) (symbol, priority);
22465 return;
22468 /* Put these in the .init_array section, using a special relocation. */
22469 if (priority != DEFAULT_INIT_PRIORITY)
22471 char buf[18];
22472 sprintf (buf, "%s.%.5u",
22473 is_ctor ? ".init_array" : ".fini_array",
22474 priority);
22475 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22477 else if (is_ctor)
22478 s = ctors_section;
22479 else
22480 s = dtors_section;
22482 switch_to_section (s);
22483 assemble_align (POINTER_SIZE);
22484 fputs ("\t.word\t", asm_out_file);
22485 output_addr_const (asm_out_file, symbol);
22486 fputs ("(target1)\n", asm_out_file);
22489 /* Add a function to the list of static constructors. */
22491 static void
22492 arm_elf_asm_constructor (rtx symbol, int priority)
22494 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22497 /* Add a function to the list of static destructors. */
22499 static void
22500 arm_elf_asm_destructor (rtx symbol, int priority)
22502 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22505 /* A finite state machine takes care of noticing whether or not instructions
22506 can be conditionally executed, and thus decrease execution time and code
22507 size by deleting branch instructions. The fsm is controlled by
22508 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22510 /* The state of the fsm controlling condition codes are:
22511 0: normal, do nothing special
22512 1: make ASM_OUTPUT_OPCODE not output this instruction
22513 2: make ASM_OUTPUT_OPCODE not output this instruction
22514 3: make instructions conditional
22515 4: make instructions conditional
22517 State transitions (state->state by whom under condition):
22518 0 -> 1 final_prescan_insn if the `target' is a label
22519 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22520 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22521 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22522 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22523 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22524 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22525 (the target insn is arm_target_insn).
22527 If the jump clobbers the conditions then we use states 2 and 4.
22529 A similar thing can be done with conditional return insns.
22531 XXX In case the `target' is an unconditional branch, this conditionalising
22532 of the instructions always reduces code size, but not always execution
22533 time. But then, I want to reduce the code size to somewhere near what
22534 /bin/cc produces. */
22536 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22537 instructions. When a COND_EXEC instruction is seen the subsequent
22538 instructions are scanned so that multiple conditional instructions can be
22539 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22540 specify the length and true/false mask for the IT block. These will be
22541 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22543 /* Returns the index of the ARM condition code string in
22544 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22545 COMPARISON should be an rtx like `(eq (...) (...))'. */
22547 enum arm_cond_code
22548 maybe_get_arm_condition_code (rtx comparison)
22550 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22551 enum arm_cond_code code;
22552 enum rtx_code comp_code = GET_CODE (comparison);
22554 if (GET_MODE_CLASS (mode) != MODE_CC)
22555 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22556 XEXP (comparison, 1));
22558 switch (mode)
22560 case CC_DNEmode: code = ARM_NE; goto dominance;
22561 case CC_DEQmode: code = ARM_EQ; goto dominance;
22562 case CC_DGEmode: code = ARM_GE; goto dominance;
22563 case CC_DGTmode: code = ARM_GT; goto dominance;
22564 case CC_DLEmode: code = ARM_LE; goto dominance;
22565 case CC_DLTmode: code = ARM_LT; goto dominance;
22566 case CC_DGEUmode: code = ARM_CS; goto dominance;
22567 case CC_DGTUmode: code = ARM_HI; goto dominance;
22568 case CC_DLEUmode: code = ARM_LS; goto dominance;
22569 case CC_DLTUmode: code = ARM_CC;
22571 dominance:
22572 if (comp_code == EQ)
22573 return ARM_INVERSE_CONDITION_CODE (code);
22574 if (comp_code == NE)
22575 return code;
22576 return ARM_NV;
22578 case CC_NOOVmode:
22579 switch (comp_code)
22581 case NE: return ARM_NE;
22582 case EQ: return ARM_EQ;
22583 case GE: return ARM_PL;
22584 case LT: return ARM_MI;
22585 default: return ARM_NV;
22588 case CC_Zmode:
22589 switch (comp_code)
22591 case NE: return ARM_NE;
22592 case EQ: return ARM_EQ;
22593 default: return ARM_NV;
22596 case CC_Nmode:
22597 switch (comp_code)
22599 case NE: return ARM_MI;
22600 case EQ: return ARM_PL;
22601 default: return ARM_NV;
22604 case CCFPEmode:
22605 case CCFPmode:
22606 /* We can handle all cases except UNEQ and LTGT. */
22607 switch (comp_code)
22609 case GE: return ARM_GE;
22610 case GT: return ARM_GT;
22611 case LE: return ARM_LS;
22612 case LT: return ARM_MI;
22613 case NE: return ARM_NE;
22614 case EQ: return ARM_EQ;
22615 case ORDERED: return ARM_VC;
22616 case UNORDERED: return ARM_VS;
22617 case UNLT: return ARM_LT;
22618 case UNLE: return ARM_LE;
22619 case UNGT: return ARM_HI;
22620 case UNGE: return ARM_PL;
22621 /* UNEQ and LTGT do not have a representation. */
22622 case UNEQ: /* Fall through. */
22623 case LTGT: /* Fall through. */
22624 default: return ARM_NV;
22627 case CC_SWPmode:
22628 switch (comp_code)
22630 case NE: return ARM_NE;
22631 case EQ: return ARM_EQ;
22632 case GE: return ARM_LE;
22633 case GT: return ARM_LT;
22634 case LE: return ARM_GE;
22635 case LT: return ARM_GT;
22636 case GEU: return ARM_LS;
22637 case GTU: return ARM_CC;
22638 case LEU: return ARM_CS;
22639 case LTU: return ARM_HI;
22640 default: return ARM_NV;
22643 case CC_Cmode:
22644 switch (comp_code)
22646 case LTU: return ARM_CS;
22647 case GEU: return ARM_CC;
22648 default: return ARM_NV;
22651 case CC_CZmode:
22652 switch (comp_code)
22654 case NE: return ARM_NE;
22655 case EQ: return ARM_EQ;
22656 case GEU: return ARM_CS;
22657 case GTU: return ARM_HI;
22658 case LEU: return ARM_LS;
22659 case LTU: return ARM_CC;
22660 default: return ARM_NV;
22663 case CC_NCVmode:
22664 switch (comp_code)
22666 case GE: return ARM_GE;
22667 case LT: return ARM_LT;
22668 case GEU: return ARM_CS;
22669 case LTU: return ARM_CC;
22670 default: return ARM_NV;
22673 case CCmode:
22674 switch (comp_code)
22676 case NE: return ARM_NE;
22677 case EQ: return ARM_EQ;
22678 case GE: return ARM_GE;
22679 case GT: return ARM_GT;
22680 case LE: return ARM_LE;
22681 case LT: return ARM_LT;
22682 case GEU: return ARM_CS;
22683 case GTU: return ARM_HI;
22684 case LEU: return ARM_LS;
22685 case LTU: return ARM_CC;
22686 default: return ARM_NV;
22689 default: gcc_unreachable ();
22693 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22694 static enum arm_cond_code
22695 get_arm_condition_code (rtx comparison)
22697 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22698 gcc_assert (code != ARM_NV);
22699 return code;
22702 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22703 instructions. */
22704 void
22705 thumb2_final_prescan_insn (rtx_insn *insn)
22707 rtx_insn *first_insn = insn;
22708 rtx body = PATTERN (insn);
22709 rtx predicate;
22710 enum arm_cond_code code;
22711 int n;
22712 int mask;
22713 int max;
22715 /* max_insns_skipped in the tune was already taken into account in the
22716 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22717 just emit the IT blocks as we can. It does not make sense to split
22718 the IT blocks. */
22719 max = MAX_INSN_PER_IT_BLOCK;
22721 /* Remove the previous insn from the count of insns to be output. */
22722 if (arm_condexec_count)
22723 arm_condexec_count--;
22725 /* Nothing to do if we are already inside a conditional block. */
22726 if (arm_condexec_count)
22727 return;
22729 if (GET_CODE (body) != COND_EXEC)
22730 return;
22732 /* Conditional jumps are implemented directly. */
22733 if (JUMP_P (insn))
22734 return;
22736 predicate = COND_EXEC_TEST (body);
22737 arm_current_cc = get_arm_condition_code (predicate);
22739 n = get_attr_ce_count (insn);
22740 arm_condexec_count = 1;
22741 arm_condexec_mask = (1 << n) - 1;
22742 arm_condexec_masklen = n;
22743 /* See if subsequent instructions can be combined into the same block. */
22744 for (;;)
22746 insn = next_nonnote_insn (insn);
22748 /* Jumping into the middle of an IT block is illegal, so a label or
22749 barrier terminates the block. */
22750 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22751 break;
22753 body = PATTERN (insn);
22754 /* USE and CLOBBER aren't really insns, so just skip them. */
22755 if (GET_CODE (body) == USE
22756 || GET_CODE (body) == CLOBBER)
22757 continue;
22759 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22760 if (GET_CODE (body) != COND_EXEC)
22761 break;
22762 /* Maximum number of conditionally executed instructions in a block. */
22763 n = get_attr_ce_count (insn);
22764 if (arm_condexec_masklen + n > max)
22765 break;
22767 predicate = COND_EXEC_TEST (body);
22768 code = get_arm_condition_code (predicate);
22769 mask = (1 << n) - 1;
22770 if (arm_current_cc == code)
22771 arm_condexec_mask |= (mask << arm_condexec_masklen);
22772 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22773 break;
22775 arm_condexec_count++;
22776 arm_condexec_masklen += n;
22778 /* A jump must be the last instruction in a conditional block. */
22779 if (JUMP_P (insn))
22780 break;
22782 /* Restore recog_data (getting the attributes of other insns can
22783 destroy this array, but final.c assumes that it remains intact
22784 across this call). */
22785 extract_constrain_insn_cached (first_insn);
22788 void
22789 arm_final_prescan_insn (rtx_insn *insn)
22791 /* BODY will hold the body of INSN. */
22792 rtx body = PATTERN (insn);
22794 /* This will be 1 if trying to repeat the trick, and things need to be
22795 reversed if it appears to fail. */
22796 int reverse = 0;
22798 /* If we start with a return insn, we only succeed if we find another one. */
22799 int seeking_return = 0;
22800 enum rtx_code return_code = UNKNOWN;
22802 /* START_INSN will hold the insn from where we start looking. This is the
22803 first insn after the following code_label if REVERSE is true. */
22804 rtx_insn *start_insn = insn;
22806 /* If in state 4, check if the target branch is reached, in order to
22807 change back to state 0. */
22808 if (arm_ccfsm_state == 4)
22810 if (insn == arm_target_insn)
22812 arm_target_insn = NULL;
22813 arm_ccfsm_state = 0;
22815 return;
22818 /* If in state 3, it is possible to repeat the trick, if this insn is an
22819 unconditional branch to a label, and immediately following this branch
22820 is the previous target label which is only used once, and the label this
22821 branch jumps to is not too far off. */
22822 if (arm_ccfsm_state == 3)
22824 if (simplejump_p (insn))
22826 start_insn = next_nonnote_insn (start_insn);
22827 if (BARRIER_P (start_insn))
22829 /* XXX Isn't this always a barrier? */
22830 start_insn = next_nonnote_insn (start_insn);
22832 if (LABEL_P (start_insn)
22833 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22834 && LABEL_NUSES (start_insn) == 1)
22835 reverse = TRUE;
22836 else
22837 return;
22839 else if (ANY_RETURN_P (body))
22841 start_insn = next_nonnote_insn (start_insn);
22842 if (BARRIER_P (start_insn))
22843 start_insn = next_nonnote_insn (start_insn);
22844 if (LABEL_P (start_insn)
22845 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22846 && LABEL_NUSES (start_insn) == 1)
22848 reverse = TRUE;
22849 seeking_return = 1;
22850 return_code = GET_CODE (body);
22852 else
22853 return;
22855 else
22856 return;
22859 gcc_assert (!arm_ccfsm_state || reverse);
22860 if (!JUMP_P (insn))
22861 return;
22863 /* This jump might be paralleled with a clobber of the condition codes
22864 the jump should always come first */
22865 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22866 body = XVECEXP (body, 0, 0);
22868 if (reverse
22869 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22870 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22872 int insns_skipped;
22873 int fail = FALSE, succeed = FALSE;
22874 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22875 int then_not_else = TRUE;
22876 rtx_insn *this_insn = start_insn;
22877 rtx label = 0;
22879 /* Register the insn jumped to. */
22880 if (reverse)
22882 if (!seeking_return)
22883 label = XEXP (SET_SRC (body), 0);
22885 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22886 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22887 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22889 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22890 then_not_else = FALSE;
22892 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22894 seeking_return = 1;
22895 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22897 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22899 seeking_return = 1;
22900 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22901 then_not_else = FALSE;
22903 else
22904 gcc_unreachable ();
22906 /* See how many insns this branch skips, and what kind of insns. If all
22907 insns are okay, and the label or unconditional branch to the same
22908 label is not too far away, succeed. */
22909 for (insns_skipped = 0;
22910 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22912 rtx scanbody;
22914 this_insn = next_nonnote_insn (this_insn);
22915 if (!this_insn)
22916 break;
22918 switch (GET_CODE (this_insn))
22920 case CODE_LABEL:
22921 /* Succeed if it is the target label, otherwise fail since
22922 control falls in from somewhere else. */
22923 if (this_insn == label)
22925 arm_ccfsm_state = 1;
22926 succeed = TRUE;
22928 else
22929 fail = TRUE;
22930 break;
22932 case BARRIER:
22933 /* Succeed if the following insn is the target label.
22934 Otherwise fail.
22935 If return insns are used then the last insn in a function
22936 will be a barrier. */
22937 this_insn = next_nonnote_insn (this_insn);
22938 if (this_insn && this_insn == label)
22940 arm_ccfsm_state = 1;
22941 succeed = TRUE;
22943 else
22944 fail = TRUE;
22945 break;
22947 case CALL_INSN:
22948 /* The AAPCS says that conditional calls should not be
22949 used since they make interworking inefficient (the
22950 linker can't transform BL<cond> into BLX). That's
22951 only a problem if the machine has BLX. */
22952 if (arm_arch5)
22954 fail = TRUE;
22955 break;
22958 /* Succeed if the following insn is the target label, or
22959 if the following two insns are a barrier and the
22960 target label. */
22961 this_insn = next_nonnote_insn (this_insn);
22962 if (this_insn && BARRIER_P (this_insn))
22963 this_insn = next_nonnote_insn (this_insn);
22965 if (this_insn && this_insn == label
22966 && insns_skipped < max_insns_skipped)
22968 arm_ccfsm_state = 1;
22969 succeed = TRUE;
22971 else
22972 fail = TRUE;
22973 break;
22975 case JUMP_INSN:
22976 /* If this is an unconditional branch to the same label, succeed.
22977 If it is to another label, do nothing. If it is conditional,
22978 fail. */
22979 /* XXX Probably, the tests for SET and the PC are
22980 unnecessary. */
22982 scanbody = PATTERN (this_insn);
22983 if (GET_CODE (scanbody) == SET
22984 && GET_CODE (SET_DEST (scanbody)) == PC)
22986 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22987 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22989 arm_ccfsm_state = 2;
22990 succeed = TRUE;
22992 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22993 fail = TRUE;
22995 /* Fail if a conditional return is undesirable (e.g. on a
22996 StrongARM), but still allow this if optimizing for size. */
22997 else if (GET_CODE (scanbody) == return_code
22998 && !use_return_insn (TRUE, NULL)
22999 && !optimize_size)
23000 fail = TRUE;
23001 else if (GET_CODE (scanbody) == return_code)
23003 arm_ccfsm_state = 2;
23004 succeed = TRUE;
23006 else if (GET_CODE (scanbody) == PARALLEL)
23008 switch (get_attr_conds (this_insn))
23010 case CONDS_NOCOND:
23011 break;
23012 default:
23013 fail = TRUE;
23014 break;
23017 else
23018 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23020 break;
23022 case INSN:
23023 /* Instructions using or affecting the condition codes make it
23024 fail. */
23025 scanbody = PATTERN (this_insn);
23026 if (!(GET_CODE (scanbody) == SET
23027 || GET_CODE (scanbody) == PARALLEL)
23028 || get_attr_conds (this_insn) != CONDS_NOCOND)
23029 fail = TRUE;
23030 break;
23032 default:
23033 break;
23036 if (succeed)
23038 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23039 arm_target_label = CODE_LABEL_NUMBER (label);
23040 else
23042 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23044 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23046 this_insn = next_nonnote_insn (this_insn);
23047 gcc_assert (!this_insn
23048 || (!BARRIER_P (this_insn)
23049 && !LABEL_P (this_insn)));
23051 if (!this_insn)
23053 /* Oh, dear! we ran off the end.. give up. */
23054 extract_constrain_insn_cached (insn);
23055 arm_ccfsm_state = 0;
23056 arm_target_insn = NULL;
23057 return;
23059 arm_target_insn = this_insn;
23062 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23063 what it was. */
23064 if (!reverse)
23065 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23067 if (reverse || then_not_else)
23068 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23071 /* Restore recog_data (getting the attributes of other insns can
23072 destroy this array, but final.c assumes that it remains intact
23073 across this call. */
23074 extract_constrain_insn_cached (insn);
23078 /* Output IT instructions. */
23079 void
23080 thumb2_asm_output_opcode (FILE * stream)
23082 char buff[5];
23083 int n;
23085 if (arm_condexec_mask)
23087 for (n = 0; n < arm_condexec_masklen; n++)
23088 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23089 buff[n] = 0;
23090 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23091 arm_condition_codes[arm_current_cc]);
23092 arm_condexec_mask = 0;
23096 /* Returns true if REGNO is a valid register
23097 for holding a quantity of type MODE. */
23099 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23101 if (GET_MODE_CLASS (mode) == MODE_CC)
23102 return (regno == CC_REGNUM
23103 || (TARGET_HARD_FLOAT && TARGET_VFP
23104 && regno == VFPCC_REGNUM));
23106 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23107 return false;
23109 if (TARGET_THUMB1)
23110 /* For the Thumb we only allow values bigger than SImode in
23111 registers 0 - 6, so that there is always a second low
23112 register available to hold the upper part of the value.
23113 We probably we ought to ensure that the register is the
23114 start of an even numbered register pair. */
23115 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23117 if (TARGET_HARD_FLOAT && TARGET_VFP
23118 && IS_VFP_REGNUM (regno))
23120 if (mode == SFmode || mode == SImode)
23121 return VFP_REGNO_OK_FOR_SINGLE (regno);
23123 if (mode == DFmode)
23124 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23126 /* VFP registers can hold HFmode values, but there is no point in
23127 putting them there unless we have hardware conversion insns. */
23128 if (mode == HFmode)
23129 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
23131 if (TARGET_NEON)
23132 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23133 || (VALID_NEON_QREG_MODE (mode)
23134 && NEON_REGNO_OK_FOR_QUAD (regno))
23135 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23136 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23137 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23138 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23139 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23141 return FALSE;
23144 if (TARGET_REALLY_IWMMXT)
23146 if (IS_IWMMXT_GR_REGNUM (regno))
23147 return mode == SImode;
23149 if (IS_IWMMXT_REGNUM (regno))
23150 return VALID_IWMMXT_REG_MODE (mode);
23153 /* We allow almost any value to be stored in the general registers.
23154 Restrict doubleword quantities to even register pairs in ARM state
23155 so that we can use ldrd. Do not allow very large Neon structure
23156 opaque modes in general registers; they would use too many. */
23157 if (regno <= LAST_ARM_REGNUM)
23159 if (ARM_NUM_REGS (mode) > 4)
23160 return FALSE;
23162 if (TARGET_THUMB2)
23163 return TRUE;
23165 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23168 if (regno == FRAME_POINTER_REGNUM
23169 || regno == ARG_POINTER_REGNUM)
23170 /* We only allow integers in the fake hard registers. */
23171 return GET_MODE_CLASS (mode) == MODE_INT;
23173 return FALSE;
23176 /* Implement MODES_TIEABLE_P. */
23178 bool
23179 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23181 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23182 return true;
23184 /* We specifically want to allow elements of "structure" modes to
23185 be tieable to the structure. This more general condition allows
23186 other rarer situations too. */
23187 if (TARGET_NEON
23188 && (VALID_NEON_DREG_MODE (mode1)
23189 || VALID_NEON_QREG_MODE (mode1)
23190 || VALID_NEON_STRUCT_MODE (mode1))
23191 && (VALID_NEON_DREG_MODE (mode2)
23192 || VALID_NEON_QREG_MODE (mode2)
23193 || VALID_NEON_STRUCT_MODE (mode2)))
23194 return true;
23196 return false;
23199 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23200 not used in arm mode. */
23202 enum reg_class
23203 arm_regno_class (int regno)
23205 if (regno == PC_REGNUM)
23206 return NO_REGS;
23208 if (TARGET_THUMB1)
23210 if (regno == STACK_POINTER_REGNUM)
23211 return STACK_REG;
23212 if (regno == CC_REGNUM)
23213 return CC_REG;
23214 if (regno < 8)
23215 return LO_REGS;
23216 return HI_REGS;
23219 if (TARGET_THUMB2 && regno < 8)
23220 return LO_REGS;
23222 if ( regno <= LAST_ARM_REGNUM
23223 || regno == FRAME_POINTER_REGNUM
23224 || regno == ARG_POINTER_REGNUM)
23225 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23227 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23228 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23230 if (IS_VFP_REGNUM (regno))
23232 if (regno <= D7_VFP_REGNUM)
23233 return VFP_D0_D7_REGS;
23234 else if (regno <= LAST_LO_VFP_REGNUM)
23235 return VFP_LO_REGS;
23236 else
23237 return VFP_HI_REGS;
23240 if (IS_IWMMXT_REGNUM (regno))
23241 return IWMMXT_REGS;
23243 if (IS_IWMMXT_GR_REGNUM (regno))
23244 return IWMMXT_GR_REGS;
23246 return NO_REGS;
23249 /* Handle a special case when computing the offset
23250 of an argument from the frame pointer. */
23252 arm_debugger_arg_offset (int value, rtx addr)
23254 rtx_insn *insn;
23256 /* We are only interested if dbxout_parms() failed to compute the offset. */
23257 if (value != 0)
23258 return 0;
23260 /* We can only cope with the case where the address is held in a register. */
23261 if (!REG_P (addr))
23262 return 0;
23264 /* If we are using the frame pointer to point at the argument, then
23265 an offset of 0 is correct. */
23266 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23267 return 0;
23269 /* If we are using the stack pointer to point at the
23270 argument, then an offset of 0 is correct. */
23271 /* ??? Check this is consistent with thumb2 frame layout. */
23272 if ((TARGET_THUMB || !frame_pointer_needed)
23273 && REGNO (addr) == SP_REGNUM)
23274 return 0;
23276 /* Oh dear. The argument is pointed to by a register rather
23277 than being held in a register, or being stored at a known
23278 offset from the frame pointer. Since GDB only understands
23279 those two kinds of argument we must translate the address
23280 held in the register into an offset from the frame pointer.
23281 We do this by searching through the insns for the function
23282 looking to see where this register gets its value. If the
23283 register is initialized from the frame pointer plus an offset
23284 then we are in luck and we can continue, otherwise we give up.
23286 This code is exercised by producing debugging information
23287 for a function with arguments like this:
23289 double func (double a, double b, int c, double d) {return d;}
23291 Without this code the stab for parameter 'd' will be set to
23292 an offset of 0 from the frame pointer, rather than 8. */
23294 /* The if() statement says:
23296 If the insn is a normal instruction
23297 and if the insn is setting the value in a register
23298 and if the register being set is the register holding the address of the argument
23299 and if the address is computing by an addition
23300 that involves adding to a register
23301 which is the frame pointer
23302 a constant integer
23304 then... */
23306 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23308 if ( NONJUMP_INSN_P (insn)
23309 && GET_CODE (PATTERN (insn)) == SET
23310 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23311 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23312 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23313 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23314 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23317 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23319 break;
23323 if (value == 0)
23325 debug_rtx (addr);
23326 warning (0, "unable to compute real location of stacked parameter");
23327 value = 8; /* XXX magic hack */
23330 return value;
23333 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23335 static const char *
23336 arm_invalid_parameter_type (const_tree t)
23338 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23339 return N_("function parameters cannot have __fp16 type");
23340 return NULL;
23343 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23345 static const char *
23346 arm_invalid_return_type (const_tree t)
23348 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23349 return N_("functions cannot return __fp16 type");
23350 return NULL;
23353 /* Implement TARGET_PROMOTED_TYPE. */
23355 static tree
23356 arm_promoted_type (const_tree t)
23358 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23359 return float_type_node;
23360 return NULL_TREE;
23363 /* Implement TARGET_CONVERT_TO_TYPE.
23364 Specifically, this hook implements the peculiarity of the ARM
23365 half-precision floating-point C semantics that requires conversions between
23366 __fp16 to or from double to do an intermediate conversion to float. */
23368 static tree
23369 arm_convert_to_type (tree type, tree expr)
23371 tree fromtype = TREE_TYPE (expr);
23372 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
23373 return NULL_TREE;
23374 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
23375 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
23376 return convert (type, convert (float_type_node, expr));
23377 return NULL_TREE;
23380 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23381 This simply adds HFmode as a supported mode; even though we don't
23382 implement arithmetic on this type directly, it's supported by
23383 optabs conversions, much the way the double-word arithmetic is
23384 special-cased in the default hook. */
23386 static bool
23387 arm_scalar_mode_supported_p (machine_mode mode)
23389 if (mode == HFmode)
23390 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23391 else if (ALL_FIXED_POINT_MODE_P (mode))
23392 return true;
23393 else
23394 return default_scalar_mode_supported_p (mode);
23397 /* Emit code to reinterpret one Neon type as another, without altering bits. */
23398 void
23399 neon_reinterpret (rtx dest, rtx src)
23401 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
23404 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23405 not to early-clobber SRC registers in the process.
23407 We assume that the operands described by SRC and DEST represent a
23408 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23409 number of components into which the copy has been decomposed. */
23410 void
23411 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23413 unsigned int i;
23415 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23416 || REGNO (operands[0]) < REGNO (operands[1]))
23418 for (i = 0; i < count; i++)
23420 operands[2 * i] = dest[i];
23421 operands[2 * i + 1] = src[i];
23424 else
23426 for (i = 0; i < count; i++)
23428 operands[2 * i] = dest[count - i - 1];
23429 operands[2 * i + 1] = src[count - i - 1];
23434 /* Split operands into moves from op[1] + op[2] into op[0]. */
23436 void
23437 neon_split_vcombine (rtx operands[3])
23439 unsigned int dest = REGNO (operands[0]);
23440 unsigned int src1 = REGNO (operands[1]);
23441 unsigned int src2 = REGNO (operands[2]);
23442 machine_mode halfmode = GET_MODE (operands[1]);
23443 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23444 rtx destlo, desthi;
23446 if (src1 == dest && src2 == dest + halfregs)
23448 /* No-op move. Can't split to nothing; emit something. */
23449 emit_note (NOTE_INSN_DELETED);
23450 return;
23453 /* Preserve register attributes for variable tracking. */
23454 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23455 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23456 GET_MODE_SIZE (halfmode));
23458 /* Special case of reversed high/low parts. Use VSWP. */
23459 if (src2 == dest && src1 == dest + halfregs)
23461 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
23462 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
23463 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23464 return;
23467 if (!reg_overlap_mentioned_p (operands[2], destlo))
23469 /* Try to avoid unnecessary moves if part of the result
23470 is in the right place already. */
23471 if (src1 != dest)
23472 emit_move_insn (destlo, operands[1]);
23473 if (src2 != dest + halfregs)
23474 emit_move_insn (desthi, operands[2]);
23476 else
23478 if (src2 != dest + halfregs)
23479 emit_move_insn (desthi, operands[2]);
23480 if (src1 != dest)
23481 emit_move_insn (destlo, operands[1]);
23485 /* Return the number (counting from 0) of
23486 the least significant set bit in MASK. */
23488 inline static int
23489 number_of_first_bit_set (unsigned mask)
23491 return ctz_hwi (mask);
23494 /* Like emit_multi_reg_push, but allowing for a different set of
23495 registers to be described as saved. MASK is the set of registers
23496 to be saved; REAL_REGS is the set of registers to be described as
23497 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23499 static rtx_insn *
23500 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23502 unsigned long regno;
23503 rtx par[10], tmp, reg;
23504 rtx_insn *insn;
23505 int i, j;
23507 /* Build the parallel of the registers actually being stored. */
23508 for (i = 0; mask; ++i, mask &= mask - 1)
23510 regno = ctz_hwi (mask);
23511 reg = gen_rtx_REG (SImode, regno);
23513 if (i == 0)
23514 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23515 else
23516 tmp = gen_rtx_USE (VOIDmode, reg);
23518 par[i] = tmp;
23521 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23522 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23523 tmp = gen_frame_mem (BLKmode, tmp);
23524 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
23525 par[0] = tmp;
23527 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23528 insn = emit_insn (tmp);
23530 /* Always build the stack adjustment note for unwind info. */
23531 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23532 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
23533 par[0] = tmp;
23535 /* Build the parallel of the registers recorded as saved for unwind. */
23536 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23538 regno = ctz_hwi (real_regs);
23539 reg = gen_rtx_REG (SImode, regno);
23541 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23542 tmp = gen_frame_mem (SImode, tmp);
23543 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
23544 RTX_FRAME_RELATED_P (tmp) = 1;
23545 par[j + 1] = tmp;
23548 if (j == 0)
23549 tmp = par[0];
23550 else
23552 RTX_FRAME_RELATED_P (par[0]) = 1;
23553 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23556 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23558 return insn;
23561 /* Emit code to push or pop registers to or from the stack. F is the
23562 assembly file. MASK is the registers to pop. */
23563 static void
23564 thumb_pop (FILE *f, unsigned long mask)
23566 int regno;
23567 int lo_mask = mask & 0xFF;
23568 int pushed_words = 0;
23570 gcc_assert (mask);
23572 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23574 /* Special case. Do not generate a POP PC statement here, do it in
23575 thumb_exit() */
23576 thumb_exit (f, -1);
23577 return;
23580 fprintf (f, "\tpop\t{");
23582 /* Look at the low registers first. */
23583 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23585 if (lo_mask & 1)
23587 asm_fprintf (f, "%r", regno);
23589 if ((lo_mask & ~1) != 0)
23590 fprintf (f, ", ");
23592 pushed_words++;
23596 if (mask & (1 << PC_REGNUM))
23598 /* Catch popping the PC. */
23599 if (TARGET_INTERWORK || TARGET_BACKTRACE
23600 || crtl->calls_eh_return)
23602 /* The PC is never poped directly, instead
23603 it is popped into r3 and then BX is used. */
23604 fprintf (f, "}\n");
23606 thumb_exit (f, -1);
23608 return;
23610 else
23612 if (mask & 0xFF)
23613 fprintf (f, ", ");
23615 asm_fprintf (f, "%r", PC_REGNUM);
23619 fprintf (f, "}\n");
23622 /* Generate code to return from a thumb function.
23623 If 'reg_containing_return_addr' is -1, then the return address is
23624 actually on the stack, at the stack pointer. */
23625 static void
23626 thumb_exit (FILE *f, int reg_containing_return_addr)
23628 unsigned regs_available_for_popping;
23629 unsigned regs_to_pop;
23630 int pops_needed;
23631 unsigned available;
23632 unsigned required;
23633 machine_mode mode;
23634 int size;
23635 int restore_a4 = FALSE;
23637 /* Compute the registers we need to pop. */
23638 regs_to_pop = 0;
23639 pops_needed = 0;
23641 if (reg_containing_return_addr == -1)
23643 regs_to_pop |= 1 << LR_REGNUM;
23644 ++pops_needed;
23647 if (TARGET_BACKTRACE)
23649 /* Restore the (ARM) frame pointer and stack pointer. */
23650 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23651 pops_needed += 2;
23654 /* If there is nothing to pop then just emit the BX instruction and
23655 return. */
23656 if (pops_needed == 0)
23658 if (crtl->calls_eh_return)
23659 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23661 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23662 return;
23664 /* Otherwise if we are not supporting interworking and we have not created
23665 a backtrace structure and the function was not entered in ARM mode then
23666 just pop the return address straight into the PC. */
23667 else if (!TARGET_INTERWORK
23668 && !TARGET_BACKTRACE
23669 && !is_called_in_ARM_mode (current_function_decl)
23670 && !crtl->calls_eh_return)
23672 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23673 return;
23676 /* Find out how many of the (return) argument registers we can corrupt. */
23677 regs_available_for_popping = 0;
23679 /* If returning via __builtin_eh_return, the bottom three registers
23680 all contain information needed for the return. */
23681 if (crtl->calls_eh_return)
23682 size = 12;
23683 else
23685 /* If we can deduce the registers used from the function's
23686 return value. This is more reliable that examining
23687 df_regs_ever_live_p () because that will be set if the register is
23688 ever used in the function, not just if the register is used
23689 to hold a return value. */
23691 if (crtl->return_rtx != 0)
23692 mode = GET_MODE (crtl->return_rtx);
23693 else
23694 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23696 size = GET_MODE_SIZE (mode);
23698 if (size == 0)
23700 /* In a void function we can use any argument register.
23701 In a function that returns a structure on the stack
23702 we can use the second and third argument registers. */
23703 if (mode == VOIDmode)
23704 regs_available_for_popping =
23705 (1 << ARG_REGISTER (1))
23706 | (1 << ARG_REGISTER (2))
23707 | (1 << ARG_REGISTER (3));
23708 else
23709 regs_available_for_popping =
23710 (1 << ARG_REGISTER (2))
23711 | (1 << ARG_REGISTER (3));
23713 else if (size <= 4)
23714 regs_available_for_popping =
23715 (1 << ARG_REGISTER (2))
23716 | (1 << ARG_REGISTER (3));
23717 else if (size <= 8)
23718 regs_available_for_popping =
23719 (1 << ARG_REGISTER (3));
23722 /* Match registers to be popped with registers into which we pop them. */
23723 for (available = regs_available_for_popping,
23724 required = regs_to_pop;
23725 required != 0 && available != 0;
23726 available &= ~(available & - available),
23727 required &= ~(required & - required))
23728 -- pops_needed;
23730 /* If we have any popping registers left over, remove them. */
23731 if (available > 0)
23732 regs_available_for_popping &= ~available;
23734 /* Otherwise if we need another popping register we can use
23735 the fourth argument register. */
23736 else if (pops_needed)
23738 /* If we have not found any free argument registers and
23739 reg a4 contains the return address, we must move it. */
23740 if (regs_available_for_popping == 0
23741 && reg_containing_return_addr == LAST_ARG_REGNUM)
23743 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23744 reg_containing_return_addr = LR_REGNUM;
23746 else if (size > 12)
23748 /* Register a4 is being used to hold part of the return value,
23749 but we have dire need of a free, low register. */
23750 restore_a4 = TRUE;
23752 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23755 if (reg_containing_return_addr != LAST_ARG_REGNUM)
23757 /* The fourth argument register is available. */
23758 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23760 --pops_needed;
23764 /* Pop as many registers as we can. */
23765 thumb_pop (f, regs_available_for_popping);
23767 /* Process the registers we popped. */
23768 if (reg_containing_return_addr == -1)
23770 /* The return address was popped into the lowest numbered register. */
23771 regs_to_pop &= ~(1 << LR_REGNUM);
23773 reg_containing_return_addr =
23774 number_of_first_bit_set (regs_available_for_popping);
23776 /* Remove this register for the mask of available registers, so that
23777 the return address will not be corrupted by further pops. */
23778 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
23781 /* If we popped other registers then handle them here. */
23782 if (regs_available_for_popping)
23784 int frame_pointer;
23786 /* Work out which register currently contains the frame pointer. */
23787 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
23789 /* Move it into the correct place. */
23790 asm_fprintf (f, "\tmov\t%r, %r\n",
23791 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
23793 /* (Temporarily) remove it from the mask of popped registers. */
23794 regs_available_for_popping &= ~(1 << frame_pointer);
23795 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
23797 if (regs_available_for_popping)
23799 int stack_pointer;
23801 /* We popped the stack pointer as well,
23802 find the register that contains it. */
23803 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
23805 /* Move it into the stack register. */
23806 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
23808 /* At this point we have popped all necessary registers, so
23809 do not worry about restoring regs_available_for_popping
23810 to its correct value:
23812 assert (pops_needed == 0)
23813 assert (regs_available_for_popping == (1 << frame_pointer))
23814 assert (regs_to_pop == (1 << STACK_POINTER)) */
23816 else
23818 /* Since we have just move the popped value into the frame
23819 pointer, the popping register is available for reuse, and
23820 we know that we still have the stack pointer left to pop. */
23821 regs_available_for_popping |= (1 << frame_pointer);
23825 /* If we still have registers left on the stack, but we no longer have
23826 any registers into which we can pop them, then we must move the return
23827 address into the link register and make available the register that
23828 contained it. */
23829 if (regs_available_for_popping == 0 && pops_needed > 0)
23831 regs_available_for_popping |= 1 << reg_containing_return_addr;
23833 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
23834 reg_containing_return_addr);
23836 reg_containing_return_addr = LR_REGNUM;
23839 /* If we have registers left on the stack then pop some more.
23840 We know that at most we will want to pop FP and SP. */
23841 if (pops_needed > 0)
23843 int popped_into;
23844 int move_to;
23846 thumb_pop (f, regs_available_for_popping);
23848 /* We have popped either FP or SP.
23849 Move whichever one it is into the correct register. */
23850 popped_into = number_of_first_bit_set (regs_available_for_popping);
23851 move_to = number_of_first_bit_set (regs_to_pop);
23853 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
23855 regs_to_pop &= ~(1 << move_to);
23857 --pops_needed;
23860 /* If we still have not popped everything then we must have only
23861 had one register available to us and we are now popping the SP. */
23862 if (pops_needed > 0)
23864 int popped_into;
23866 thumb_pop (f, regs_available_for_popping);
23868 popped_into = number_of_first_bit_set (regs_available_for_popping);
23870 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
23872 assert (regs_to_pop == (1 << STACK_POINTER))
23873 assert (pops_needed == 1)
23877 /* If necessary restore the a4 register. */
23878 if (restore_a4)
23880 if (reg_containing_return_addr != LR_REGNUM)
23882 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23883 reg_containing_return_addr = LR_REGNUM;
23886 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
23889 if (crtl->calls_eh_return)
23890 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23892 /* Return to caller. */
23893 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23896 /* Scan INSN just before assembler is output for it.
23897 For Thumb-1, we track the status of the condition codes; this
23898 information is used in the cbranchsi4_insn pattern. */
23899 void
23900 thumb1_final_prescan_insn (rtx_insn *insn)
23902 if (flag_print_asm_name)
23903 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
23904 INSN_ADDRESSES (INSN_UID (insn)));
23905 /* Don't overwrite the previous setter when we get to a cbranch. */
23906 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
23908 enum attr_conds conds;
23910 if (cfun->machine->thumb1_cc_insn)
23912 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
23913 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
23914 CC_STATUS_INIT;
23916 conds = get_attr_conds (insn);
23917 if (conds == CONDS_SET)
23919 rtx set = single_set (insn);
23920 cfun->machine->thumb1_cc_insn = insn;
23921 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
23922 cfun->machine->thumb1_cc_op1 = const0_rtx;
23923 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
23924 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
23926 rtx src1 = XEXP (SET_SRC (set), 1);
23927 if (src1 == const0_rtx)
23928 cfun->machine->thumb1_cc_mode = CCmode;
23930 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
23932 /* Record the src register operand instead of dest because
23933 cprop_hardreg pass propagates src. */
23934 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
23937 else if (conds != CONDS_NOCOND)
23938 cfun->machine->thumb1_cc_insn = NULL_RTX;
23941 /* Check if unexpected far jump is used. */
23942 if (cfun->machine->lr_save_eliminated
23943 && get_attr_far_jump (insn) == FAR_JUMP_YES)
23944 internal_error("Unexpected thumb1 far jump");
23948 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
23950 unsigned HOST_WIDE_INT mask = 0xff;
23951 int i;
23953 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
23954 if (val == 0) /* XXX */
23955 return 0;
23957 for (i = 0; i < 25; i++)
23958 if ((val & (mask << i)) == val)
23959 return 1;
23961 return 0;
23964 /* Returns nonzero if the current function contains,
23965 or might contain a far jump. */
23966 static int
23967 thumb_far_jump_used_p (void)
23969 rtx_insn *insn;
23970 bool far_jump = false;
23971 unsigned int func_size = 0;
23973 /* This test is only important for leaf functions. */
23974 /* assert (!leaf_function_p ()); */
23976 /* If we have already decided that far jumps may be used,
23977 do not bother checking again, and always return true even if
23978 it turns out that they are not being used. Once we have made
23979 the decision that far jumps are present (and that hence the link
23980 register will be pushed onto the stack) we cannot go back on it. */
23981 if (cfun->machine->far_jump_used)
23982 return 1;
23984 /* If this function is not being called from the prologue/epilogue
23985 generation code then it must be being called from the
23986 INITIAL_ELIMINATION_OFFSET macro. */
23987 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
23989 /* In this case we know that we are being asked about the elimination
23990 of the arg pointer register. If that register is not being used,
23991 then there are no arguments on the stack, and we do not have to
23992 worry that a far jump might force the prologue to push the link
23993 register, changing the stack offsets. In this case we can just
23994 return false, since the presence of far jumps in the function will
23995 not affect stack offsets.
23997 If the arg pointer is live (or if it was live, but has now been
23998 eliminated and so set to dead) then we do have to test to see if
23999 the function might contain a far jump. This test can lead to some
24000 false negatives, since before reload is completed, then length of
24001 branch instructions is not known, so gcc defaults to returning their
24002 longest length, which in turn sets the far jump attribute to true.
24004 A false negative will not result in bad code being generated, but it
24005 will result in a needless push and pop of the link register. We
24006 hope that this does not occur too often.
24008 If we need doubleword stack alignment this could affect the other
24009 elimination offsets so we can't risk getting it wrong. */
24010 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24011 cfun->machine->arg_pointer_live = 1;
24012 else if (!cfun->machine->arg_pointer_live)
24013 return 0;
24016 /* We should not change far_jump_used during or after reload, as there is
24017 no chance to change stack frame layout. */
24018 if (reload_in_progress || reload_completed)
24019 return 0;
24021 /* Check to see if the function contains a branch
24022 insn with the far jump attribute set. */
24023 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24025 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24027 far_jump = true;
24029 func_size += get_attr_length (insn);
24032 /* Attribute far_jump will always be true for thumb1 before
24033 shorten_branch pass. So checking far_jump attribute before
24034 shorten_branch isn't much useful.
24036 Following heuristic tries to estimate more accurately if a far jump
24037 may finally be used. The heuristic is very conservative as there is
24038 no chance to roll-back the decision of not to use far jump.
24040 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24041 2-byte insn is associated with a 4 byte constant pool. Using
24042 function size 2048/3 as the threshold is conservative enough. */
24043 if (far_jump)
24045 if ((func_size * 3) >= 2048)
24047 /* Record the fact that we have decided that
24048 the function does use far jumps. */
24049 cfun->machine->far_jump_used = 1;
24050 return 1;
24054 return 0;
24057 /* Return nonzero if FUNC must be entered in ARM mode. */
24059 is_called_in_ARM_mode (tree func)
24061 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24063 /* Ignore the problem about functions whose address is taken. */
24064 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24065 return TRUE;
24067 #ifdef ARM_PE
24068 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24069 #else
24070 return FALSE;
24071 #endif
24074 /* Given the stack offsets and register mask in OFFSETS, decide how
24075 many additional registers to push instead of subtracting a constant
24076 from SP. For epilogues the principle is the same except we use pop.
24077 FOR_PROLOGUE indicates which we're generating. */
24078 static int
24079 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24081 HOST_WIDE_INT amount;
24082 unsigned long live_regs_mask = offsets->saved_regs_mask;
24083 /* Extract a mask of the ones we can give to the Thumb's push/pop
24084 instruction. */
24085 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24086 /* Then count how many other high registers will need to be pushed. */
24087 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24088 int n_free, reg_base, size;
24090 if (!for_prologue && frame_pointer_needed)
24091 amount = offsets->locals_base - offsets->saved_regs;
24092 else
24093 amount = offsets->outgoing_args - offsets->saved_regs;
24095 /* If the stack frame size is 512 exactly, we can save one load
24096 instruction, which should make this a win even when optimizing
24097 for speed. */
24098 if (!optimize_size && amount != 512)
24099 return 0;
24101 /* Can't do this if there are high registers to push. */
24102 if (high_regs_pushed != 0)
24103 return 0;
24105 /* Shouldn't do it in the prologue if no registers would normally
24106 be pushed at all. In the epilogue, also allow it if we'll have
24107 a pop insn for the PC. */
24108 if (l_mask == 0
24109 && (for_prologue
24110 || TARGET_BACKTRACE
24111 || (live_regs_mask & 1 << LR_REGNUM) == 0
24112 || TARGET_INTERWORK
24113 || crtl->args.pretend_args_size != 0))
24114 return 0;
24116 /* Don't do this if thumb_expand_prologue wants to emit instructions
24117 between the push and the stack frame allocation. */
24118 if (for_prologue
24119 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24120 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24121 return 0;
24123 reg_base = 0;
24124 n_free = 0;
24125 if (!for_prologue)
24127 size = arm_size_return_regs ();
24128 reg_base = ARM_NUM_INTS (size);
24129 live_regs_mask >>= reg_base;
24132 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24133 && (for_prologue || call_used_regs[reg_base + n_free]))
24135 live_regs_mask >>= 1;
24136 n_free++;
24139 if (n_free == 0)
24140 return 0;
24141 gcc_assert (amount / 4 * 4 == amount);
24143 if (amount >= 512 && (amount - n_free * 4) < 512)
24144 return (amount - 508) / 4;
24145 if (amount <= n_free * 4)
24146 return amount / 4;
24147 return 0;
24150 /* The bits which aren't usefully expanded as rtl. */
24151 const char *
24152 thumb1_unexpanded_epilogue (void)
24154 arm_stack_offsets *offsets;
24155 int regno;
24156 unsigned long live_regs_mask = 0;
24157 int high_regs_pushed = 0;
24158 int extra_pop;
24159 int had_to_push_lr;
24160 int size;
24162 if (cfun->machine->return_used_this_function != 0)
24163 return "";
24165 if (IS_NAKED (arm_current_func_type ()))
24166 return "";
24168 offsets = arm_get_frame_offsets ();
24169 live_regs_mask = offsets->saved_regs_mask;
24170 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24172 /* If we can deduce the registers used from the function's return value.
24173 This is more reliable that examining df_regs_ever_live_p () because that
24174 will be set if the register is ever used in the function, not just if
24175 the register is used to hold a return value. */
24176 size = arm_size_return_regs ();
24178 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24179 if (extra_pop > 0)
24181 unsigned long extra_mask = (1 << extra_pop) - 1;
24182 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24185 /* The prolog may have pushed some high registers to use as
24186 work registers. e.g. the testsuite file:
24187 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24188 compiles to produce:
24189 push {r4, r5, r6, r7, lr}
24190 mov r7, r9
24191 mov r6, r8
24192 push {r6, r7}
24193 as part of the prolog. We have to undo that pushing here. */
24195 if (high_regs_pushed)
24197 unsigned long mask = live_regs_mask & 0xff;
24198 int next_hi_reg;
24200 /* The available low registers depend on the size of the value we are
24201 returning. */
24202 if (size <= 12)
24203 mask |= 1 << 3;
24204 if (size <= 8)
24205 mask |= 1 << 2;
24207 if (mask == 0)
24208 /* Oh dear! We have no low registers into which we can pop
24209 high registers! */
24210 internal_error
24211 ("no low registers available for popping high registers");
24213 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24214 if (live_regs_mask & (1 << next_hi_reg))
24215 break;
24217 while (high_regs_pushed)
24219 /* Find lo register(s) into which the high register(s) can
24220 be popped. */
24221 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24223 if (mask & (1 << regno))
24224 high_regs_pushed--;
24225 if (high_regs_pushed == 0)
24226 break;
24229 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24231 /* Pop the values into the low register(s). */
24232 thumb_pop (asm_out_file, mask);
24234 /* Move the value(s) into the high registers. */
24235 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24237 if (mask & (1 << regno))
24239 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24240 regno);
24242 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24243 if (live_regs_mask & (1 << next_hi_reg))
24244 break;
24248 live_regs_mask &= ~0x0f00;
24251 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24252 live_regs_mask &= 0xff;
24254 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24256 /* Pop the return address into the PC. */
24257 if (had_to_push_lr)
24258 live_regs_mask |= 1 << PC_REGNUM;
24260 /* Either no argument registers were pushed or a backtrace
24261 structure was created which includes an adjusted stack
24262 pointer, so just pop everything. */
24263 if (live_regs_mask)
24264 thumb_pop (asm_out_file, live_regs_mask);
24266 /* We have either just popped the return address into the
24267 PC or it is was kept in LR for the entire function.
24268 Note that thumb_pop has already called thumb_exit if the
24269 PC was in the list. */
24270 if (!had_to_push_lr)
24271 thumb_exit (asm_out_file, LR_REGNUM);
24273 else
24275 /* Pop everything but the return address. */
24276 if (live_regs_mask)
24277 thumb_pop (asm_out_file, live_regs_mask);
24279 if (had_to_push_lr)
24281 if (size > 12)
24283 /* We have no free low regs, so save one. */
24284 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24285 LAST_ARG_REGNUM);
24288 /* Get the return address into a temporary register. */
24289 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24291 if (size > 12)
24293 /* Move the return address to lr. */
24294 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24295 LAST_ARG_REGNUM);
24296 /* Restore the low register. */
24297 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24298 IP_REGNUM);
24299 regno = LR_REGNUM;
24301 else
24302 regno = LAST_ARG_REGNUM;
24304 else
24305 regno = LR_REGNUM;
24307 /* Remove the argument registers that were pushed onto the stack. */
24308 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24309 SP_REGNUM, SP_REGNUM,
24310 crtl->args.pretend_args_size);
24312 thumb_exit (asm_out_file, regno);
24315 return "";
24318 /* Functions to save and restore machine-specific function data. */
24319 static struct machine_function *
24320 arm_init_machine_status (void)
24322 struct machine_function *machine;
24323 machine = ggc_cleared_alloc<machine_function> ();
24325 #if ARM_FT_UNKNOWN != 0
24326 machine->func_type = ARM_FT_UNKNOWN;
24327 #endif
24328 return machine;
24331 /* Return an RTX indicating where the return address to the
24332 calling function can be found. */
24334 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24336 if (count != 0)
24337 return NULL_RTX;
24339 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24342 /* Do anything needed before RTL is emitted for each function. */
24343 void
24344 arm_init_expanders (void)
24346 /* Arrange to initialize and mark the machine per-function status. */
24347 init_machine_status = arm_init_machine_status;
24349 /* This is to stop the combine pass optimizing away the alignment
24350 adjustment of va_arg. */
24351 /* ??? It is claimed that this should not be necessary. */
24352 if (cfun)
24353 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24357 /* Like arm_compute_initial_elimination offset. Simpler because there
24358 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24359 to point at the base of the local variables after static stack
24360 space for a function has been allocated. */
24362 HOST_WIDE_INT
24363 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24365 arm_stack_offsets *offsets;
24367 offsets = arm_get_frame_offsets ();
24369 switch (from)
24371 case ARG_POINTER_REGNUM:
24372 switch (to)
24374 case STACK_POINTER_REGNUM:
24375 return offsets->outgoing_args - offsets->saved_args;
24377 case FRAME_POINTER_REGNUM:
24378 return offsets->soft_frame - offsets->saved_args;
24380 case ARM_HARD_FRAME_POINTER_REGNUM:
24381 return offsets->saved_regs - offsets->saved_args;
24383 case THUMB_HARD_FRAME_POINTER_REGNUM:
24384 return offsets->locals_base - offsets->saved_args;
24386 default:
24387 gcc_unreachable ();
24389 break;
24391 case FRAME_POINTER_REGNUM:
24392 switch (to)
24394 case STACK_POINTER_REGNUM:
24395 return offsets->outgoing_args - offsets->soft_frame;
24397 case ARM_HARD_FRAME_POINTER_REGNUM:
24398 return offsets->saved_regs - offsets->soft_frame;
24400 case THUMB_HARD_FRAME_POINTER_REGNUM:
24401 return offsets->locals_base - offsets->soft_frame;
24403 default:
24404 gcc_unreachable ();
24406 break;
24408 default:
24409 gcc_unreachable ();
24413 /* Generate the function's prologue. */
24415 void
24416 thumb1_expand_prologue (void)
24418 rtx_insn *insn;
24420 HOST_WIDE_INT amount;
24421 arm_stack_offsets *offsets;
24422 unsigned long func_type;
24423 int regno;
24424 unsigned long live_regs_mask;
24425 unsigned long l_mask;
24426 unsigned high_regs_pushed = 0;
24428 func_type = arm_current_func_type ();
24430 /* Naked functions don't have prologues. */
24431 if (IS_NAKED (func_type))
24432 return;
24434 if (IS_INTERRUPT (func_type))
24436 error ("interrupt Service Routines cannot be coded in Thumb mode");
24437 return;
24440 if (is_called_in_ARM_mode (current_function_decl))
24441 emit_insn (gen_prologue_thumb1_interwork ());
24443 offsets = arm_get_frame_offsets ();
24444 live_regs_mask = offsets->saved_regs_mask;
24446 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24447 l_mask = live_regs_mask & 0x40ff;
24448 /* Then count how many other high registers will need to be pushed. */
24449 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24451 if (crtl->args.pretend_args_size)
24453 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24455 if (cfun->machine->uses_anonymous_args)
24457 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24458 unsigned long mask;
24460 mask = 1ul << (LAST_ARG_REGNUM + 1);
24461 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24463 insn = thumb1_emit_multi_reg_push (mask, 0);
24465 else
24467 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24468 stack_pointer_rtx, x));
24470 RTX_FRAME_RELATED_P (insn) = 1;
24473 if (TARGET_BACKTRACE)
24475 HOST_WIDE_INT offset = 0;
24476 unsigned work_register;
24477 rtx work_reg, x, arm_hfp_rtx;
24479 /* We have been asked to create a stack backtrace structure.
24480 The code looks like this:
24482 0 .align 2
24483 0 func:
24484 0 sub SP, #16 Reserve space for 4 registers.
24485 2 push {R7} Push low registers.
24486 4 add R7, SP, #20 Get the stack pointer before the push.
24487 6 str R7, [SP, #8] Store the stack pointer
24488 (before reserving the space).
24489 8 mov R7, PC Get hold of the start of this code + 12.
24490 10 str R7, [SP, #16] Store it.
24491 12 mov R7, FP Get hold of the current frame pointer.
24492 14 str R7, [SP, #4] Store it.
24493 16 mov R7, LR Get hold of the current return address.
24494 18 str R7, [SP, #12] Store it.
24495 20 add R7, SP, #16 Point at the start of the
24496 backtrace structure.
24497 22 mov FP, R7 Put this value into the frame pointer. */
24499 work_register = thumb_find_work_register (live_regs_mask);
24500 work_reg = gen_rtx_REG (SImode, work_register);
24501 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24503 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24504 stack_pointer_rtx, GEN_INT (-16)));
24505 RTX_FRAME_RELATED_P (insn) = 1;
24507 if (l_mask)
24509 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24510 RTX_FRAME_RELATED_P (insn) = 1;
24512 offset = bit_count (l_mask) * UNITS_PER_WORD;
24515 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24516 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24518 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24519 x = gen_frame_mem (SImode, x);
24520 emit_move_insn (x, work_reg);
24522 /* Make sure that the instruction fetching the PC is in the right place
24523 to calculate "start of backtrace creation code + 12". */
24524 /* ??? The stores using the common WORK_REG ought to be enough to
24525 prevent the scheduler from doing anything weird. Failing that
24526 we could always move all of the following into an UNSPEC_VOLATILE. */
24527 if (l_mask)
24529 x = gen_rtx_REG (SImode, PC_REGNUM);
24530 emit_move_insn (work_reg, x);
24532 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24533 x = gen_frame_mem (SImode, x);
24534 emit_move_insn (x, work_reg);
24536 emit_move_insn (work_reg, arm_hfp_rtx);
24538 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24539 x = gen_frame_mem (SImode, x);
24540 emit_move_insn (x, work_reg);
24542 else
24544 emit_move_insn (work_reg, arm_hfp_rtx);
24546 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24547 x = gen_frame_mem (SImode, x);
24548 emit_move_insn (x, work_reg);
24550 x = gen_rtx_REG (SImode, PC_REGNUM);
24551 emit_move_insn (work_reg, x);
24553 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24554 x = gen_frame_mem (SImode, x);
24555 emit_move_insn (x, work_reg);
24558 x = gen_rtx_REG (SImode, LR_REGNUM);
24559 emit_move_insn (work_reg, x);
24561 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24562 x = gen_frame_mem (SImode, x);
24563 emit_move_insn (x, work_reg);
24565 x = GEN_INT (offset + 12);
24566 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24568 emit_move_insn (arm_hfp_rtx, work_reg);
24570 /* Optimization: If we are not pushing any low registers but we are going
24571 to push some high registers then delay our first push. This will just
24572 be a push of LR and we can combine it with the push of the first high
24573 register. */
24574 else if ((l_mask & 0xff) != 0
24575 || (high_regs_pushed == 0 && l_mask))
24577 unsigned long mask = l_mask;
24578 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24579 insn = thumb1_emit_multi_reg_push (mask, mask);
24580 RTX_FRAME_RELATED_P (insn) = 1;
24583 if (high_regs_pushed)
24585 unsigned pushable_regs;
24586 unsigned next_hi_reg;
24587 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24588 : crtl->args.info.nregs;
24589 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24591 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24592 if (live_regs_mask & (1 << next_hi_reg))
24593 break;
24595 /* Here we need to mask out registers used for passing arguments
24596 even if they can be pushed. This is to avoid using them to stash the high
24597 registers. Such kind of stash may clobber the use of arguments. */
24598 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
24600 if (pushable_regs == 0)
24601 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24603 while (high_regs_pushed > 0)
24605 unsigned long real_regs_mask = 0;
24607 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
24609 if (pushable_regs & (1 << regno))
24611 emit_move_insn (gen_rtx_REG (SImode, regno),
24612 gen_rtx_REG (SImode, next_hi_reg));
24614 high_regs_pushed --;
24615 real_regs_mask |= (1 << next_hi_reg);
24617 if (high_regs_pushed)
24619 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24620 next_hi_reg --)
24621 if (live_regs_mask & (1 << next_hi_reg))
24622 break;
24624 else
24626 pushable_regs &= ~((1 << regno) - 1);
24627 break;
24632 /* If we had to find a work register and we have not yet
24633 saved the LR then add it to the list of regs to push. */
24634 if (l_mask == (1 << LR_REGNUM))
24636 pushable_regs |= l_mask;
24637 real_regs_mask |= l_mask;
24638 l_mask = 0;
24641 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
24642 RTX_FRAME_RELATED_P (insn) = 1;
24646 /* Load the pic register before setting the frame pointer,
24647 so we can use r7 as a temporary work register. */
24648 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24649 arm_load_pic_register (live_regs_mask);
24651 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24652 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24653 stack_pointer_rtx);
24655 if (flag_stack_usage_info)
24656 current_function_static_stack_size
24657 = offsets->outgoing_args - offsets->saved_args;
24659 amount = offsets->outgoing_args - offsets->saved_regs;
24660 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24661 if (amount)
24663 if (amount < 512)
24665 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24666 GEN_INT (- amount)));
24667 RTX_FRAME_RELATED_P (insn) = 1;
24669 else
24671 rtx reg, dwarf;
24673 /* The stack decrement is too big for an immediate value in a single
24674 insn. In theory we could issue multiple subtracts, but after
24675 three of them it becomes more space efficient to place the full
24676 value in the constant pool and load into a register. (Also the
24677 ARM debugger really likes to see only one stack decrement per
24678 function). So instead we look for a scratch register into which
24679 we can load the decrement, and then we subtract this from the
24680 stack pointer. Unfortunately on the thumb the only available
24681 scratch registers are the argument registers, and we cannot use
24682 these as they may hold arguments to the function. Instead we
24683 attempt to locate a call preserved register which is used by this
24684 function. If we can find one, then we know that it will have
24685 been pushed at the start of the prologue and so we can corrupt
24686 it now. */
24687 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24688 if (live_regs_mask & (1 << regno))
24689 break;
24691 gcc_assert(regno <= LAST_LO_REGNUM);
24693 reg = gen_rtx_REG (SImode, regno);
24695 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24697 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24698 stack_pointer_rtx, reg));
24700 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
24701 plus_constant (Pmode, stack_pointer_rtx,
24702 -amount));
24703 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24704 RTX_FRAME_RELATED_P (insn) = 1;
24708 if (frame_pointer_needed)
24709 thumb_set_frame_pointer (offsets);
24711 /* If we are profiling, make sure no instructions are scheduled before
24712 the call to mcount. Similarly if the user has requested no
24713 scheduling in the prolog. Similarly if we want non-call exceptions
24714 using the EABI unwinder, to prevent faulting instructions from being
24715 swapped with a stack adjustment. */
24716 if (crtl->profile || !TARGET_SCHED_PROLOG
24717 || (arm_except_unwind_info (&global_options) == UI_TARGET
24718 && cfun->can_throw_non_call_exceptions))
24719 emit_insn (gen_blockage ());
24721 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24722 if (live_regs_mask & 0xff)
24723 cfun->machine->lr_save_eliminated = 0;
24726 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
24727 POP instruction can be generated. LR should be replaced by PC. All
24728 the checks required are already done by USE_RETURN_INSN (). Hence,
24729 all we really need to check here is if single register is to be
24730 returned, or multiple register return. */
24731 void
24732 thumb2_expand_return (bool simple_return)
24734 int i, num_regs;
24735 unsigned long saved_regs_mask;
24736 arm_stack_offsets *offsets;
24738 offsets = arm_get_frame_offsets ();
24739 saved_regs_mask = offsets->saved_regs_mask;
24741 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
24742 if (saved_regs_mask & (1 << i))
24743 num_regs++;
24745 if (!simple_return && saved_regs_mask)
24747 if (num_regs == 1)
24749 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
24750 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
24751 rtx addr = gen_rtx_MEM (SImode,
24752 gen_rtx_POST_INC (SImode,
24753 stack_pointer_rtx));
24754 set_mem_alias_set (addr, get_frame_alias_set ());
24755 XVECEXP (par, 0, 0) = ret_rtx;
24756 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
24757 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
24758 emit_jump_insn (par);
24760 else
24762 saved_regs_mask &= ~ (1 << LR_REGNUM);
24763 saved_regs_mask |= (1 << PC_REGNUM);
24764 arm_emit_multi_reg_pop (saved_regs_mask);
24767 else
24769 emit_jump_insn (simple_return_rtx);
24773 void
24774 thumb1_expand_epilogue (void)
24776 HOST_WIDE_INT amount;
24777 arm_stack_offsets *offsets;
24778 int regno;
24780 /* Naked functions don't have prologues. */
24781 if (IS_NAKED (arm_current_func_type ()))
24782 return;
24784 offsets = arm_get_frame_offsets ();
24785 amount = offsets->outgoing_args - offsets->saved_regs;
24787 if (frame_pointer_needed)
24789 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
24790 amount = offsets->locals_base - offsets->saved_regs;
24792 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
24794 gcc_assert (amount >= 0);
24795 if (amount)
24797 emit_insn (gen_blockage ());
24799 if (amount < 512)
24800 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24801 GEN_INT (amount)));
24802 else
24804 /* r3 is always free in the epilogue. */
24805 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
24807 emit_insn (gen_movsi (reg, GEN_INT (amount)));
24808 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
24812 /* Emit a USE (stack_pointer_rtx), so that
24813 the stack adjustment will not be deleted. */
24814 emit_insn (gen_force_register_use (stack_pointer_rtx));
24816 if (crtl->profile || !TARGET_SCHED_PROLOG)
24817 emit_insn (gen_blockage ());
24819 /* Emit a clobber for each insn that will be restored in the epilogue,
24820 so that flow2 will get register lifetimes correct. */
24821 for (regno = 0; regno < 13; regno++)
24822 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
24823 emit_clobber (gen_rtx_REG (SImode, regno));
24825 if (! df_regs_ever_live_p (LR_REGNUM))
24826 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
24829 /* Epilogue code for APCS frame. */
24830 static void
24831 arm_expand_epilogue_apcs_frame (bool really_return)
24833 unsigned long func_type;
24834 unsigned long saved_regs_mask;
24835 int num_regs = 0;
24836 int i;
24837 int floats_from_frame = 0;
24838 arm_stack_offsets *offsets;
24840 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
24841 func_type = arm_current_func_type ();
24843 /* Get frame offsets for ARM. */
24844 offsets = arm_get_frame_offsets ();
24845 saved_regs_mask = offsets->saved_regs_mask;
24847 /* Find the offset of the floating-point save area in the frame. */
24848 floats_from_frame
24849 = (offsets->saved_args
24850 + arm_compute_static_chain_stack_bytes ()
24851 - offsets->frame);
24853 /* Compute how many core registers saved and how far away the floats are. */
24854 for (i = 0; i <= LAST_ARM_REGNUM; i++)
24855 if (saved_regs_mask & (1 << i))
24857 num_regs++;
24858 floats_from_frame += 4;
24861 if (TARGET_HARD_FLOAT && TARGET_VFP)
24863 int start_reg;
24864 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
24866 /* The offset is from IP_REGNUM. */
24867 int saved_size = arm_get_vfp_saved_size ();
24868 if (saved_size > 0)
24870 rtx_insn *insn;
24871 floats_from_frame += saved_size;
24872 insn = emit_insn (gen_addsi3 (ip_rtx,
24873 hard_frame_pointer_rtx,
24874 GEN_INT (-floats_from_frame)));
24875 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
24876 ip_rtx, hard_frame_pointer_rtx);
24879 /* Generate VFP register multi-pop. */
24880 start_reg = FIRST_VFP_REGNUM;
24882 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
24883 /* Look for a case where a reg does not need restoring. */
24884 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24885 && (!df_regs_ever_live_p (i + 1)
24886 || call_used_regs[i + 1]))
24888 if (start_reg != i)
24889 arm_emit_vfp_multi_reg_pop (start_reg,
24890 (i - start_reg) / 2,
24891 gen_rtx_REG (SImode,
24892 IP_REGNUM));
24893 start_reg = i + 2;
24896 /* Restore the remaining regs that we have discovered (or possibly
24897 even all of them, if the conditional in the for loop never
24898 fired). */
24899 if (start_reg != i)
24900 arm_emit_vfp_multi_reg_pop (start_reg,
24901 (i - start_reg) / 2,
24902 gen_rtx_REG (SImode, IP_REGNUM));
24905 if (TARGET_IWMMXT)
24907 /* The frame pointer is guaranteed to be non-double-word aligned, as
24908 it is set to double-word-aligned old_stack_pointer - 4. */
24909 rtx_insn *insn;
24910 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
24912 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
24913 if (df_regs_ever_live_p (i) && !call_used_regs[i])
24915 rtx addr = gen_frame_mem (V2SImode,
24916 plus_constant (Pmode, hard_frame_pointer_rtx,
24917 - lrm_count * 4));
24918 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24919 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24920 gen_rtx_REG (V2SImode, i),
24921 NULL_RTX);
24922 lrm_count += 2;
24926 /* saved_regs_mask should contain IP which contains old stack pointer
24927 at the time of activation creation. Since SP and IP are adjacent registers,
24928 we can restore the value directly into SP. */
24929 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
24930 saved_regs_mask &= ~(1 << IP_REGNUM);
24931 saved_regs_mask |= (1 << SP_REGNUM);
24933 /* There are two registers left in saved_regs_mask - LR and PC. We
24934 only need to restore LR (the return address), but to
24935 save time we can load it directly into PC, unless we need a
24936 special function exit sequence, or we are not really returning. */
24937 if (really_return
24938 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
24939 && !crtl->calls_eh_return)
24940 /* Delete LR from the register mask, so that LR on
24941 the stack is loaded into the PC in the register mask. */
24942 saved_regs_mask &= ~(1 << LR_REGNUM);
24943 else
24944 saved_regs_mask &= ~(1 << PC_REGNUM);
24946 num_regs = bit_count (saved_regs_mask);
24947 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
24949 rtx_insn *insn;
24950 emit_insn (gen_blockage ());
24951 /* Unwind the stack to just below the saved registers. */
24952 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24953 hard_frame_pointer_rtx,
24954 GEN_INT (- 4 * num_regs)));
24956 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
24957 stack_pointer_rtx, hard_frame_pointer_rtx);
24960 arm_emit_multi_reg_pop (saved_regs_mask);
24962 if (IS_INTERRUPT (func_type))
24964 /* Interrupt handlers will have pushed the
24965 IP onto the stack, so restore it now. */
24966 rtx_insn *insn;
24967 rtx addr = gen_rtx_MEM (SImode,
24968 gen_rtx_POST_INC (SImode,
24969 stack_pointer_rtx));
24970 set_mem_alias_set (addr, get_frame_alias_set ());
24971 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
24972 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24973 gen_rtx_REG (SImode, IP_REGNUM),
24974 NULL_RTX);
24977 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
24978 return;
24980 if (crtl->calls_eh_return)
24981 emit_insn (gen_addsi3 (stack_pointer_rtx,
24982 stack_pointer_rtx,
24983 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
24985 if (IS_STACKALIGN (func_type))
24986 /* Restore the original stack pointer. Before prologue, the stack was
24987 realigned and the original stack pointer saved in r0. For details,
24988 see comment in arm_expand_prologue. */
24989 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
24991 emit_jump_insn (simple_return_rtx);
24994 /* Generate RTL to represent ARM epilogue. Really_return is true if the
24995 function is not a sibcall. */
24996 void
24997 arm_expand_epilogue (bool really_return)
24999 unsigned long func_type;
25000 unsigned long saved_regs_mask;
25001 int num_regs = 0;
25002 int i;
25003 int amount;
25004 arm_stack_offsets *offsets;
25006 func_type = arm_current_func_type ();
25008 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25009 let output_return_instruction take care of instruction emission if any. */
25010 if (IS_NAKED (func_type)
25011 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25013 if (really_return)
25014 emit_jump_insn (simple_return_rtx);
25015 return;
25018 /* If we are throwing an exception, then we really must be doing a
25019 return, so we can't tail-call. */
25020 gcc_assert (!crtl->calls_eh_return || really_return);
25022 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25024 arm_expand_epilogue_apcs_frame (really_return);
25025 return;
25028 /* Get frame offsets for ARM. */
25029 offsets = arm_get_frame_offsets ();
25030 saved_regs_mask = offsets->saved_regs_mask;
25031 num_regs = bit_count (saved_regs_mask);
25033 if (frame_pointer_needed)
25035 rtx_insn *insn;
25036 /* Restore stack pointer if necessary. */
25037 if (TARGET_ARM)
25039 /* In ARM mode, frame pointer points to first saved register.
25040 Restore stack pointer to last saved register. */
25041 amount = offsets->frame - offsets->saved_regs;
25043 /* Force out any pending memory operations that reference stacked data
25044 before stack de-allocation occurs. */
25045 emit_insn (gen_blockage ());
25046 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25047 hard_frame_pointer_rtx,
25048 GEN_INT (amount)));
25049 arm_add_cfa_adjust_cfa_note (insn, amount,
25050 stack_pointer_rtx,
25051 hard_frame_pointer_rtx);
25053 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25054 deleted. */
25055 emit_insn (gen_force_register_use (stack_pointer_rtx));
25057 else
25059 /* In Thumb-2 mode, the frame pointer points to the last saved
25060 register. */
25061 amount = offsets->locals_base - offsets->saved_regs;
25062 if (amount)
25064 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25065 hard_frame_pointer_rtx,
25066 GEN_INT (amount)));
25067 arm_add_cfa_adjust_cfa_note (insn, amount,
25068 hard_frame_pointer_rtx,
25069 hard_frame_pointer_rtx);
25072 /* Force out any pending memory operations that reference stacked data
25073 before stack de-allocation occurs. */
25074 emit_insn (gen_blockage ());
25075 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25076 hard_frame_pointer_rtx));
25077 arm_add_cfa_adjust_cfa_note (insn, 0,
25078 stack_pointer_rtx,
25079 hard_frame_pointer_rtx);
25080 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25081 deleted. */
25082 emit_insn (gen_force_register_use (stack_pointer_rtx));
25085 else
25087 /* Pop off outgoing args and local frame to adjust stack pointer to
25088 last saved register. */
25089 amount = offsets->outgoing_args - offsets->saved_regs;
25090 if (amount)
25092 rtx_insn *tmp;
25093 /* Force out any pending memory operations that reference stacked data
25094 before stack de-allocation occurs. */
25095 emit_insn (gen_blockage ());
25096 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25097 stack_pointer_rtx,
25098 GEN_INT (amount)));
25099 arm_add_cfa_adjust_cfa_note (tmp, amount,
25100 stack_pointer_rtx, stack_pointer_rtx);
25101 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25102 not deleted. */
25103 emit_insn (gen_force_register_use (stack_pointer_rtx));
25107 if (TARGET_HARD_FLOAT && TARGET_VFP)
25109 /* Generate VFP register multi-pop. */
25110 int end_reg = LAST_VFP_REGNUM + 1;
25112 /* Scan the registers in reverse order. We need to match
25113 any groupings made in the prologue and generate matching
25114 vldm operations. The need to match groups is because,
25115 unlike pop, vldm can only do consecutive regs. */
25116 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25117 /* Look for a case where a reg does not need restoring. */
25118 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25119 && (!df_regs_ever_live_p (i + 1)
25120 || call_used_regs[i + 1]))
25122 /* Restore the regs discovered so far (from reg+2 to
25123 end_reg). */
25124 if (end_reg > i + 2)
25125 arm_emit_vfp_multi_reg_pop (i + 2,
25126 (end_reg - (i + 2)) / 2,
25127 stack_pointer_rtx);
25128 end_reg = i;
25131 /* Restore the remaining regs that we have discovered (or possibly
25132 even all of them, if the conditional in the for loop never
25133 fired). */
25134 if (end_reg > i + 2)
25135 arm_emit_vfp_multi_reg_pop (i + 2,
25136 (end_reg - (i + 2)) / 2,
25137 stack_pointer_rtx);
25140 if (TARGET_IWMMXT)
25141 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25142 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25144 rtx_insn *insn;
25145 rtx addr = gen_rtx_MEM (V2SImode,
25146 gen_rtx_POST_INC (SImode,
25147 stack_pointer_rtx));
25148 set_mem_alias_set (addr, get_frame_alias_set ());
25149 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25150 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25151 gen_rtx_REG (V2SImode, i),
25152 NULL_RTX);
25153 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25154 stack_pointer_rtx, stack_pointer_rtx);
25157 if (saved_regs_mask)
25159 rtx insn;
25160 bool return_in_pc = false;
25162 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25163 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25164 && !IS_STACKALIGN (func_type)
25165 && really_return
25166 && crtl->args.pretend_args_size == 0
25167 && saved_regs_mask & (1 << LR_REGNUM)
25168 && !crtl->calls_eh_return)
25170 saved_regs_mask &= ~(1 << LR_REGNUM);
25171 saved_regs_mask |= (1 << PC_REGNUM);
25172 return_in_pc = true;
25175 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25177 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25178 if (saved_regs_mask & (1 << i))
25180 rtx addr = gen_rtx_MEM (SImode,
25181 gen_rtx_POST_INC (SImode,
25182 stack_pointer_rtx));
25183 set_mem_alias_set (addr, get_frame_alias_set ());
25185 if (i == PC_REGNUM)
25187 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25188 XVECEXP (insn, 0, 0) = ret_rtx;
25189 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
25190 gen_rtx_REG (SImode, i),
25191 addr);
25192 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25193 insn = emit_jump_insn (insn);
25195 else
25197 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25198 addr));
25199 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25200 gen_rtx_REG (SImode, i),
25201 NULL_RTX);
25202 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25203 stack_pointer_rtx,
25204 stack_pointer_rtx);
25208 else
25210 if (TARGET_LDRD
25211 && current_tune->prefer_ldrd_strd
25212 && !optimize_function_for_size_p (cfun))
25214 if (TARGET_THUMB2)
25215 thumb2_emit_ldrd_pop (saved_regs_mask);
25216 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25217 arm_emit_ldrd_pop (saved_regs_mask);
25218 else
25219 arm_emit_multi_reg_pop (saved_regs_mask);
25221 else
25222 arm_emit_multi_reg_pop (saved_regs_mask);
25225 if (return_in_pc)
25226 return;
25229 if (crtl->args.pretend_args_size)
25231 int i, j;
25232 rtx dwarf = NULL_RTX;
25233 rtx_insn *tmp =
25234 emit_insn (gen_addsi3 (stack_pointer_rtx,
25235 stack_pointer_rtx,
25236 GEN_INT (crtl->args.pretend_args_size)));
25238 RTX_FRAME_RELATED_P (tmp) = 1;
25240 if (cfun->machine->uses_anonymous_args)
25242 /* Restore pretend args. Refer arm_expand_prologue on how to save
25243 pretend_args in stack. */
25244 int num_regs = crtl->args.pretend_args_size / 4;
25245 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25246 for (j = 0, i = 0; j < num_regs; i++)
25247 if (saved_regs_mask & (1 << i))
25249 rtx reg = gen_rtx_REG (SImode, i);
25250 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25251 j++;
25253 REG_NOTES (tmp) = dwarf;
25255 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
25256 stack_pointer_rtx, stack_pointer_rtx);
25259 if (!really_return)
25260 return;
25262 if (crtl->calls_eh_return)
25263 emit_insn (gen_addsi3 (stack_pointer_rtx,
25264 stack_pointer_rtx,
25265 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25267 if (IS_STACKALIGN (func_type))
25268 /* Restore the original stack pointer. Before prologue, the stack was
25269 realigned and the original stack pointer saved in r0. For details,
25270 see comment in arm_expand_prologue. */
25271 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25273 emit_jump_insn (simple_return_rtx);
25276 /* Implementation of insn prologue_thumb1_interwork. This is the first
25277 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25279 const char *
25280 thumb1_output_interwork (void)
25282 const char * name;
25283 FILE *f = asm_out_file;
25285 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25286 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25287 == SYMBOL_REF);
25288 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25290 /* Generate code sequence to switch us into Thumb mode. */
25291 /* The .code 32 directive has already been emitted by
25292 ASM_DECLARE_FUNCTION_NAME. */
25293 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25294 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25296 /* Generate a label, so that the debugger will notice the
25297 change in instruction sets. This label is also used by
25298 the assembler to bypass the ARM code when this function
25299 is called from a Thumb encoded function elsewhere in the
25300 same file. Hence the definition of STUB_NAME here must
25301 agree with the definition in gas/config/tc-arm.c. */
25303 #define STUB_NAME ".real_start_of"
25305 fprintf (f, "\t.code\t16\n");
25306 #ifdef ARM_PE
25307 if (arm_dllexport_name_p (name))
25308 name = arm_strip_name_encoding (name);
25309 #endif
25310 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25311 fprintf (f, "\t.thumb_func\n");
25312 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25314 return "";
25317 /* Handle the case of a double word load into a low register from
25318 a computed memory address. The computed address may involve a
25319 register which is overwritten by the load. */
25320 const char *
25321 thumb_load_double_from_address (rtx *operands)
25323 rtx addr;
25324 rtx base;
25325 rtx offset;
25326 rtx arg1;
25327 rtx arg2;
25329 gcc_assert (REG_P (operands[0]));
25330 gcc_assert (MEM_P (operands[1]));
25332 /* Get the memory address. */
25333 addr = XEXP (operands[1], 0);
25335 /* Work out how the memory address is computed. */
25336 switch (GET_CODE (addr))
25338 case REG:
25339 operands[2] = adjust_address (operands[1], SImode, 4);
25341 if (REGNO (operands[0]) == REGNO (addr))
25343 output_asm_insn ("ldr\t%H0, %2", operands);
25344 output_asm_insn ("ldr\t%0, %1", operands);
25346 else
25348 output_asm_insn ("ldr\t%0, %1", operands);
25349 output_asm_insn ("ldr\t%H0, %2", operands);
25351 break;
25353 case CONST:
25354 /* Compute <address> + 4 for the high order load. */
25355 operands[2] = adjust_address (operands[1], SImode, 4);
25357 output_asm_insn ("ldr\t%0, %1", operands);
25358 output_asm_insn ("ldr\t%H0, %2", operands);
25359 break;
25361 case PLUS:
25362 arg1 = XEXP (addr, 0);
25363 arg2 = XEXP (addr, 1);
25365 if (CONSTANT_P (arg1))
25366 base = arg2, offset = arg1;
25367 else
25368 base = arg1, offset = arg2;
25370 gcc_assert (REG_P (base));
25372 /* Catch the case of <address> = <reg> + <reg> */
25373 if (REG_P (offset))
25375 int reg_offset = REGNO (offset);
25376 int reg_base = REGNO (base);
25377 int reg_dest = REGNO (operands[0]);
25379 /* Add the base and offset registers together into the
25380 higher destination register. */
25381 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25382 reg_dest + 1, reg_base, reg_offset);
25384 /* Load the lower destination register from the address in
25385 the higher destination register. */
25386 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25387 reg_dest, reg_dest + 1);
25389 /* Load the higher destination register from its own address
25390 plus 4. */
25391 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25392 reg_dest + 1, reg_dest + 1);
25394 else
25396 /* Compute <address> + 4 for the high order load. */
25397 operands[2] = adjust_address (operands[1], SImode, 4);
25399 /* If the computed address is held in the low order register
25400 then load the high order register first, otherwise always
25401 load the low order register first. */
25402 if (REGNO (operands[0]) == REGNO (base))
25404 output_asm_insn ("ldr\t%H0, %2", operands);
25405 output_asm_insn ("ldr\t%0, %1", operands);
25407 else
25409 output_asm_insn ("ldr\t%0, %1", operands);
25410 output_asm_insn ("ldr\t%H0, %2", operands);
25413 break;
25415 case LABEL_REF:
25416 /* With no registers to worry about we can just load the value
25417 directly. */
25418 operands[2] = adjust_address (operands[1], SImode, 4);
25420 output_asm_insn ("ldr\t%H0, %2", operands);
25421 output_asm_insn ("ldr\t%0, %1", operands);
25422 break;
25424 default:
25425 gcc_unreachable ();
25428 return "";
25431 const char *
25432 thumb_output_move_mem_multiple (int n, rtx *operands)
25434 rtx tmp;
25436 switch (n)
25438 case 2:
25439 if (REGNO (operands[4]) > REGNO (operands[5]))
25441 tmp = operands[4];
25442 operands[4] = operands[5];
25443 operands[5] = tmp;
25445 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25446 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25447 break;
25449 case 3:
25450 if (REGNO (operands[4]) > REGNO (operands[5]))
25451 std::swap (operands[4], operands[5]);
25452 if (REGNO (operands[5]) > REGNO (operands[6]))
25453 std::swap (operands[5], operands[6]);
25454 if (REGNO (operands[4]) > REGNO (operands[5]))
25455 std::swap (operands[4], operands[5]);
25457 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25458 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25459 break;
25461 default:
25462 gcc_unreachable ();
25465 return "";
25468 /* Output a call-via instruction for thumb state. */
25469 const char *
25470 thumb_call_via_reg (rtx reg)
25472 int regno = REGNO (reg);
25473 rtx *labelp;
25475 gcc_assert (regno < LR_REGNUM);
25477 /* If we are in the normal text section we can use a single instance
25478 per compilation unit. If we are doing function sections, then we need
25479 an entry per section, since we can't rely on reachability. */
25480 if (in_section == text_section)
25482 thumb_call_reg_needed = 1;
25484 if (thumb_call_via_label[regno] == NULL)
25485 thumb_call_via_label[regno] = gen_label_rtx ();
25486 labelp = thumb_call_via_label + regno;
25488 else
25490 if (cfun->machine->call_via[regno] == NULL)
25491 cfun->machine->call_via[regno] = gen_label_rtx ();
25492 labelp = cfun->machine->call_via + regno;
25495 output_asm_insn ("bl\t%a0", labelp);
25496 return "";
25499 /* Routines for generating rtl. */
25500 void
25501 thumb_expand_movmemqi (rtx *operands)
25503 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25504 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25505 HOST_WIDE_INT len = INTVAL (operands[2]);
25506 HOST_WIDE_INT offset = 0;
25508 while (len >= 12)
25510 emit_insn (gen_movmem12b (out, in, out, in));
25511 len -= 12;
25514 if (len >= 8)
25516 emit_insn (gen_movmem8b (out, in, out, in));
25517 len -= 8;
25520 if (len >= 4)
25522 rtx reg = gen_reg_rtx (SImode);
25523 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25524 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25525 len -= 4;
25526 offset += 4;
25529 if (len >= 2)
25531 rtx reg = gen_reg_rtx (HImode);
25532 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25533 plus_constant (Pmode, in,
25534 offset))));
25535 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25536 offset)),
25537 reg));
25538 len -= 2;
25539 offset += 2;
25542 if (len)
25544 rtx reg = gen_reg_rtx (QImode);
25545 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25546 plus_constant (Pmode, in,
25547 offset))));
25548 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25549 offset)),
25550 reg));
25554 void
25555 thumb_reload_out_hi (rtx *operands)
25557 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25560 /* Handle reading a half-word from memory during reload. */
25561 void
25562 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
25564 gcc_unreachable ();
25567 /* Return the length of a function name prefix
25568 that starts with the character 'c'. */
25569 static int
25570 arm_get_strip_length (int c)
25572 switch (c)
25574 ARM_NAME_ENCODING_LENGTHS
25575 default: return 0;
25579 /* Return a pointer to a function's name with any
25580 and all prefix encodings stripped from it. */
25581 const char *
25582 arm_strip_name_encoding (const char *name)
25584 int skip;
25586 while ((skip = arm_get_strip_length (* name)))
25587 name += skip;
25589 return name;
25592 /* If there is a '*' anywhere in the name's prefix, then
25593 emit the stripped name verbatim, otherwise prepend an
25594 underscore if leading underscores are being used. */
25595 void
25596 arm_asm_output_labelref (FILE *stream, const char *name)
25598 int skip;
25599 int verbatim = 0;
25601 while ((skip = arm_get_strip_length (* name)))
25603 verbatim |= (*name == '*');
25604 name += skip;
25607 if (verbatim)
25608 fputs (name, stream);
25609 else
25610 asm_fprintf (stream, "%U%s", name);
25613 /* This function is used to emit an EABI tag and its associated value.
25614 We emit the numerical value of the tag in case the assembler does not
25615 support textual tags. (Eg gas prior to 2.20). If requested we include
25616 the tag name in a comment so that anyone reading the assembler output
25617 will know which tag is being set.
25619 This function is not static because arm-c.c needs it too. */
25621 void
25622 arm_emit_eabi_attribute (const char *name, int num, int val)
25624 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
25625 if (flag_verbose_asm || flag_debug_asm)
25626 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
25627 asm_fprintf (asm_out_file, "\n");
25630 /* This function is used to print CPU tuning information as comment
25631 in assembler file. Pointers are not printed for now. */
25633 void
25634 arm_print_tune_info (void)
25636 asm_fprintf (asm_out_file, "\t@.tune parameters\n");
25637 asm_fprintf (asm_out_file, "\t\t@constant_limit:\t%d\n",
25638 current_tune->constant_limit);
25639 asm_fprintf (asm_out_file, "\t\t@max_insns_skipped:\t%d\n",
25640 current_tune->max_insns_skipped);
25641 asm_fprintf (asm_out_file, "\t\t@num_prefetch_slots:\t%d\n",
25642 current_tune->num_prefetch_slots);
25643 asm_fprintf (asm_out_file, "\t\t@l1_cache_size:\t%d\n",
25644 current_tune->l1_cache_size);
25645 asm_fprintf (asm_out_file, "\t\t@l1_cache_line_size:\t%d\n",
25646 current_tune->l1_cache_line_size);
25647 asm_fprintf (asm_out_file, "\t\t@prefer_constant_pool:\t%d\n",
25648 (int) current_tune->prefer_constant_pool);
25649 asm_fprintf (asm_out_file, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
25650 asm_fprintf (asm_out_file, "\t\t\t\ts&p\tcost\n");
25651 asm_fprintf (asm_out_file, "\t\t\t\t00\t%d\n",
25652 current_tune->branch_cost (false, false));
25653 asm_fprintf (asm_out_file, "\t\t\t\t01\t%d\n",
25654 current_tune->branch_cost (false, true));
25655 asm_fprintf (asm_out_file, "\t\t\t\t10\t%d\n",
25656 current_tune->branch_cost (true, false));
25657 asm_fprintf (asm_out_file, "\t\t\t\t11\t%d\n",
25658 current_tune->branch_cost (true, true));
25659 asm_fprintf (asm_out_file, "\t\t@prefer_ldrd_strd:\t%d\n",
25660 (int) current_tune->prefer_ldrd_strd);
25661 asm_fprintf (asm_out_file, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
25662 (int) current_tune->logical_op_non_short_circuit[0],
25663 (int) current_tune->logical_op_non_short_circuit[1]);
25664 asm_fprintf (asm_out_file, "\t\t@prefer_neon_for_64bits:\t%d\n",
25665 (int) current_tune->prefer_neon_for_64bits);
25666 asm_fprintf (asm_out_file,
25667 "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
25668 (int) current_tune->disparage_flag_setting_t16_encodings);
25669 asm_fprintf (asm_out_file,
25670 "\t\t@disparage_partial_flag_setting_t16_encodings:\t%d\n",
25671 (int) current_tune
25672 ->disparage_partial_flag_setting_t16_encodings);
25673 asm_fprintf (asm_out_file, "\t\t@string_ops_prefer_neon:\t%d\n",
25674 (int) current_tune->string_ops_prefer_neon);
25675 asm_fprintf (asm_out_file, "\t\t@max_insns_inline_memset:\t%d\n",
25676 current_tune->max_insns_inline_memset);
25677 asm_fprintf (asm_out_file, "\t\t@fuseable_ops:\t%u\n",
25678 current_tune->fuseable_ops);
25679 asm_fprintf (asm_out_file, "\t\t@sched_autopref:\t%d\n",
25680 (int) current_tune->sched_autopref);
25683 static void
25684 arm_file_start (void)
25686 int val;
25688 if (TARGET_UNIFIED_ASM)
25689 asm_fprintf (asm_out_file, "\t.syntax unified\n");
25691 if (TARGET_BPABI)
25693 const char *fpu_name;
25694 if (arm_selected_arch)
25696 /* armv7ve doesn't support any extensions. */
25697 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
25699 /* Keep backward compatability for assemblers
25700 which don't support armv7ve. */
25701 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
25702 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
25703 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
25704 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
25705 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
25707 else
25709 const char* pos = strchr (arm_selected_arch->name, '+');
25710 if (pos)
25712 char buf[15];
25713 gcc_assert (strlen (arm_selected_arch->name)
25714 <= sizeof (buf) / sizeof (*pos));
25715 strncpy (buf, arm_selected_arch->name,
25716 (pos - arm_selected_arch->name) * sizeof (*pos));
25717 buf[pos - arm_selected_arch->name] = '\0';
25718 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
25719 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
25721 else
25722 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
25725 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
25726 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
25727 else
25729 const char* truncated_name
25730 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
25731 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
25734 if (print_tune_info)
25735 arm_print_tune_info ();
25737 if (TARGET_SOFT_FLOAT)
25739 fpu_name = "softvfp";
25741 else
25743 fpu_name = arm_fpu_desc->name;
25744 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
25746 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
25747 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
25749 if (TARGET_HARD_FLOAT_ABI)
25750 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
25753 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
25755 /* Some of these attributes only apply when the corresponding features
25756 are used. However we don't have any easy way of figuring this out.
25757 Conservatively record the setting that would have been used. */
25759 if (flag_rounding_math)
25760 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
25762 if (!flag_unsafe_math_optimizations)
25764 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
25765 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25767 if (flag_signaling_nans)
25768 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25770 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25771 flag_finite_math_only ? 1 : 3);
25773 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25774 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25775 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25776 flag_short_enums ? 1 : 2);
25778 /* Tag_ABI_optimization_goals. */
25779 if (optimize_size)
25780 val = 4;
25781 else if (optimize >= 2)
25782 val = 2;
25783 else if (optimize)
25784 val = 1;
25785 else
25786 val = 6;
25787 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
25789 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
25790 unaligned_access);
25792 if (arm_fp16_format)
25793 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
25794 (int) arm_fp16_format);
25796 if (arm_lang_output_object_attributes_hook)
25797 arm_lang_output_object_attributes_hook();
25800 default_file_start ();
25803 static void
25804 arm_file_end (void)
25806 int regno;
25808 if (NEED_INDICATE_EXEC_STACK)
25809 /* Add .note.GNU-stack. */
25810 file_end_indicate_exec_stack ();
25812 if (! thumb_call_reg_needed)
25813 return;
25815 switch_to_section (text_section);
25816 asm_fprintf (asm_out_file, "\t.code 16\n");
25817 ASM_OUTPUT_ALIGN (asm_out_file, 1);
25819 for (regno = 0; regno < LR_REGNUM; regno++)
25821 rtx label = thumb_call_via_label[regno];
25823 if (label != 0)
25825 targetm.asm_out.internal_label (asm_out_file, "L",
25826 CODE_LABEL_NUMBER (label));
25827 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
25832 #ifndef ARM_PE
25833 /* Symbols in the text segment can be accessed without indirecting via the
25834 constant pool; it may take an extra binary operation, but this is still
25835 faster than indirecting via memory. Don't do this when not optimizing,
25836 since we won't be calculating al of the offsets necessary to do this
25837 simplification. */
25839 static void
25840 arm_encode_section_info (tree decl, rtx rtl, int first)
25842 if (optimize > 0 && TREE_CONSTANT (decl))
25843 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
25845 default_encode_section_info (decl, rtl, first);
25847 #endif /* !ARM_PE */
25849 static void
25850 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
25852 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
25853 && !strcmp (prefix, "L"))
25855 arm_ccfsm_state = 0;
25856 arm_target_insn = NULL;
25858 default_internal_label (stream, prefix, labelno);
25861 /* Output code to add DELTA to the first argument, and then jump
25862 to FUNCTION. Used for C++ multiple inheritance. */
25863 static void
25864 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
25865 HOST_WIDE_INT delta,
25866 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
25867 tree function)
25869 static int thunk_label = 0;
25870 char label[256];
25871 char labelpc[256];
25872 int mi_delta = delta;
25873 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
25874 int shift = 0;
25875 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
25876 ? 1 : 0);
25877 if (mi_delta < 0)
25878 mi_delta = - mi_delta;
25880 final_start_function (emit_barrier (), file, 1);
25882 if (TARGET_THUMB1)
25884 int labelno = thunk_label++;
25885 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
25886 /* Thunks are entered in arm mode when avaiable. */
25887 if (TARGET_THUMB1_ONLY)
25889 /* push r3 so we can use it as a temporary. */
25890 /* TODO: Omit this save if r3 is not used. */
25891 fputs ("\tpush {r3}\n", file);
25892 fputs ("\tldr\tr3, ", file);
25894 else
25896 fputs ("\tldr\tr12, ", file);
25898 assemble_name (file, label);
25899 fputc ('\n', file);
25900 if (flag_pic)
25902 /* If we are generating PIC, the ldr instruction below loads
25903 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
25904 the address of the add + 8, so we have:
25906 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
25907 = target + 1.
25909 Note that we have "+ 1" because some versions of GNU ld
25910 don't set the low bit of the result for R_ARM_REL32
25911 relocations against thumb function symbols.
25912 On ARMv6M this is +4, not +8. */
25913 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
25914 assemble_name (file, labelpc);
25915 fputs (":\n", file);
25916 if (TARGET_THUMB1_ONLY)
25918 /* This is 2 insns after the start of the thunk, so we know it
25919 is 4-byte aligned. */
25920 fputs ("\tadd\tr3, pc, r3\n", file);
25921 fputs ("\tmov r12, r3\n", file);
25923 else
25924 fputs ("\tadd\tr12, pc, r12\n", file);
25926 else if (TARGET_THUMB1_ONLY)
25927 fputs ("\tmov r12, r3\n", file);
25929 if (TARGET_THUMB1_ONLY)
25931 if (mi_delta > 255)
25933 fputs ("\tldr\tr3, ", file);
25934 assemble_name (file, label);
25935 fputs ("+4\n", file);
25936 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
25937 mi_op, this_regno, this_regno);
25939 else if (mi_delta != 0)
25941 /* Thumb1 unified syntax requires s suffix in instruction name when
25942 one of the operands is immediate. */
25943 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
25944 mi_op, this_regno, this_regno,
25945 mi_delta);
25948 else
25950 /* TODO: Use movw/movt for large constants when available. */
25951 while (mi_delta != 0)
25953 if ((mi_delta & (3 << shift)) == 0)
25954 shift += 2;
25955 else
25957 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
25958 mi_op, this_regno, this_regno,
25959 mi_delta & (0xff << shift));
25960 mi_delta &= ~(0xff << shift);
25961 shift += 8;
25965 if (TARGET_THUMB1)
25967 if (TARGET_THUMB1_ONLY)
25968 fputs ("\tpop\t{r3}\n", file);
25970 fprintf (file, "\tbx\tr12\n");
25971 ASM_OUTPUT_ALIGN (file, 2);
25972 assemble_name (file, label);
25973 fputs (":\n", file);
25974 if (flag_pic)
25976 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
25977 rtx tem = XEXP (DECL_RTL (function), 0);
25978 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
25979 pipeline offset is four rather than eight. Adjust the offset
25980 accordingly. */
25981 tem = plus_constant (GET_MODE (tem), tem,
25982 TARGET_THUMB1_ONLY ? -3 : -7);
25983 tem = gen_rtx_MINUS (GET_MODE (tem),
25984 tem,
25985 gen_rtx_SYMBOL_REF (Pmode,
25986 ggc_strdup (labelpc)));
25987 assemble_integer (tem, 4, BITS_PER_WORD, 1);
25989 else
25990 /* Output ".word .LTHUNKn". */
25991 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
25993 if (TARGET_THUMB1_ONLY && mi_delta > 255)
25994 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
25996 else
25998 fputs ("\tb\t", file);
25999 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26000 if (NEED_PLT_RELOC)
26001 fputs ("(PLT)", file);
26002 fputc ('\n', file);
26005 final_end_function ();
26009 arm_emit_vector_const (FILE *file, rtx x)
26011 int i;
26012 const char * pattern;
26014 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26016 switch (GET_MODE (x))
26018 case V2SImode: pattern = "%08x"; break;
26019 case V4HImode: pattern = "%04x"; break;
26020 case V8QImode: pattern = "%02x"; break;
26021 default: gcc_unreachable ();
26024 fprintf (file, "0x");
26025 for (i = CONST_VECTOR_NUNITS (x); i--;)
26027 rtx element;
26029 element = CONST_VECTOR_ELT (x, i);
26030 fprintf (file, pattern, INTVAL (element));
26033 return 1;
26036 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26037 HFmode constant pool entries are actually loaded with ldr. */
26038 void
26039 arm_emit_fp16_const (rtx c)
26041 REAL_VALUE_TYPE r;
26042 long bits;
26044 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
26045 bits = real_to_target (NULL, &r, HFmode);
26046 if (WORDS_BIG_ENDIAN)
26047 assemble_zeros (2);
26048 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26049 if (!WORDS_BIG_ENDIAN)
26050 assemble_zeros (2);
26053 const char *
26054 arm_output_load_gr (rtx *operands)
26056 rtx reg;
26057 rtx offset;
26058 rtx wcgr;
26059 rtx sum;
26061 if (!MEM_P (operands [1])
26062 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26063 || !REG_P (reg = XEXP (sum, 0))
26064 || !CONST_INT_P (offset = XEXP (sum, 1))
26065 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26066 return "wldrw%?\t%0, %1";
26068 /* Fix up an out-of-range load of a GR register. */
26069 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26070 wcgr = operands[0];
26071 operands[0] = reg;
26072 output_asm_insn ("ldr%?\t%0, %1", operands);
26074 operands[0] = wcgr;
26075 operands[1] = reg;
26076 output_asm_insn ("tmcr%?\t%0, %1", operands);
26077 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26079 return "";
26082 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26084 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26085 named arg and all anonymous args onto the stack.
26086 XXX I know the prologue shouldn't be pushing registers, but it is faster
26087 that way. */
26089 static void
26090 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26091 machine_mode mode,
26092 tree type,
26093 int *pretend_size,
26094 int second_time ATTRIBUTE_UNUSED)
26096 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26097 int nregs;
26099 cfun->machine->uses_anonymous_args = 1;
26100 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26102 nregs = pcum->aapcs_ncrn;
26103 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
26104 nregs++;
26106 else
26107 nregs = pcum->nregs;
26109 if (nregs < NUM_ARG_REGS)
26110 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26113 /* We can't rely on the caller doing the proper promotion when
26114 using APCS or ATPCS. */
26116 static bool
26117 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26119 return !TARGET_AAPCS_BASED;
26122 static machine_mode
26123 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26124 machine_mode mode,
26125 int *punsignedp ATTRIBUTE_UNUSED,
26126 const_tree fntype ATTRIBUTE_UNUSED,
26127 int for_return ATTRIBUTE_UNUSED)
26129 if (GET_MODE_CLASS (mode) == MODE_INT
26130 && GET_MODE_SIZE (mode) < 4)
26131 return SImode;
26133 return mode;
26136 /* AAPCS based ABIs use short enums by default. */
26138 static bool
26139 arm_default_short_enums (void)
26141 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
26145 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26147 static bool
26148 arm_align_anon_bitfield (void)
26150 return TARGET_AAPCS_BASED;
26154 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26156 static tree
26157 arm_cxx_guard_type (void)
26159 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26163 /* The EABI says test the least significant bit of a guard variable. */
26165 static bool
26166 arm_cxx_guard_mask_bit (void)
26168 return TARGET_AAPCS_BASED;
26172 /* The EABI specifies that all array cookies are 8 bytes long. */
26174 static tree
26175 arm_get_cookie_size (tree type)
26177 tree size;
26179 if (!TARGET_AAPCS_BASED)
26180 return default_cxx_get_cookie_size (type);
26182 size = build_int_cst (sizetype, 8);
26183 return size;
26187 /* The EABI says that array cookies should also contain the element size. */
26189 static bool
26190 arm_cookie_has_size (void)
26192 return TARGET_AAPCS_BASED;
26196 /* The EABI says constructors and destructors should return a pointer to
26197 the object constructed/destroyed. */
26199 static bool
26200 arm_cxx_cdtor_returns_this (void)
26202 return TARGET_AAPCS_BASED;
26205 /* The EABI says that an inline function may never be the key
26206 method. */
26208 static bool
26209 arm_cxx_key_method_may_be_inline (void)
26211 return !TARGET_AAPCS_BASED;
26214 static void
26215 arm_cxx_determine_class_data_visibility (tree decl)
26217 if (!TARGET_AAPCS_BASED
26218 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26219 return;
26221 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26222 is exported. However, on systems without dynamic vague linkage,
26223 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26224 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26225 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26226 else
26227 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26228 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26231 static bool
26232 arm_cxx_class_data_always_comdat (void)
26234 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26235 vague linkage if the class has no key function. */
26236 return !TARGET_AAPCS_BASED;
26240 /* The EABI says __aeabi_atexit should be used to register static
26241 destructors. */
26243 static bool
26244 arm_cxx_use_aeabi_atexit (void)
26246 return TARGET_AAPCS_BASED;
26250 void
26251 arm_set_return_address (rtx source, rtx scratch)
26253 arm_stack_offsets *offsets;
26254 HOST_WIDE_INT delta;
26255 rtx addr;
26256 unsigned long saved_regs;
26258 offsets = arm_get_frame_offsets ();
26259 saved_regs = offsets->saved_regs_mask;
26261 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26262 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26263 else
26265 if (frame_pointer_needed)
26266 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26267 else
26269 /* LR will be the first saved register. */
26270 delta = offsets->outgoing_args - (offsets->frame + 4);
26273 if (delta >= 4096)
26275 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26276 GEN_INT (delta & ~4095)));
26277 addr = scratch;
26278 delta &= 4095;
26280 else
26281 addr = stack_pointer_rtx;
26283 addr = plus_constant (Pmode, addr, delta);
26285 /* The store needs to be marked as frame related in order to prevent
26286 DSE from deleting it as dead if it is based on fp. */
26287 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26288 RTX_FRAME_RELATED_P (insn) = 1;
26289 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26294 void
26295 thumb_set_return_address (rtx source, rtx scratch)
26297 arm_stack_offsets *offsets;
26298 HOST_WIDE_INT delta;
26299 HOST_WIDE_INT limit;
26300 int reg;
26301 rtx addr;
26302 unsigned long mask;
26304 emit_use (source);
26306 offsets = arm_get_frame_offsets ();
26307 mask = offsets->saved_regs_mask;
26308 if (mask & (1 << LR_REGNUM))
26310 limit = 1024;
26311 /* Find the saved regs. */
26312 if (frame_pointer_needed)
26314 delta = offsets->soft_frame - offsets->saved_args;
26315 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26316 if (TARGET_THUMB1)
26317 limit = 128;
26319 else
26321 delta = offsets->outgoing_args - offsets->saved_args;
26322 reg = SP_REGNUM;
26324 /* Allow for the stack frame. */
26325 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26326 delta -= 16;
26327 /* The link register is always the first saved register. */
26328 delta -= 4;
26330 /* Construct the address. */
26331 addr = gen_rtx_REG (SImode, reg);
26332 if (delta > limit)
26334 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26335 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26336 addr = scratch;
26338 else
26339 addr = plus_constant (Pmode, addr, delta);
26341 /* The store needs to be marked as frame related in order to prevent
26342 DSE from deleting it as dead if it is based on fp. */
26343 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26344 RTX_FRAME_RELATED_P (insn) = 1;
26345 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26347 else
26348 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26351 /* Implements target hook vector_mode_supported_p. */
26352 bool
26353 arm_vector_mode_supported_p (machine_mode mode)
26355 /* Neon also supports V2SImode, etc. listed in the clause below. */
26356 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26357 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
26358 return true;
26360 if ((TARGET_NEON || TARGET_IWMMXT)
26361 && ((mode == V2SImode)
26362 || (mode == V4HImode)
26363 || (mode == V8QImode)))
26364 return true;
26366 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26367 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26368 || mode == V2HAmode))
26369 return true;
26371 return false;
26374 /* Implements target hook array_mode_supported_p. */
26376 static bool
26377 arm_array_mode_supported_p (machine_mode mode,
26378 unsigned HOST_WIDE_INT nelems)
26380 if (TARGET_NEON
26381 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26382 && (nelems >= 2 && nelems <= 4))
26383 return true;
26385 return false;
26388 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26389 registers when autovectorizing for Neon, at least until multiple vector
26390 widths are supported properly by the middle-end. */
26392 static machine_mode
26393 arm_preferred_simd_mode (machine_mode mode)
26395 if (TARGET_NEON)
26396 switch (mode)
26398 case SFmode:
26399 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26400 case SImode:
26401 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26402 case HImode:
26403 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26404 case QImode:
26405 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26406 case DImode:
26407 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26408 return V2DImode;
26409 break;
26411 default:;
26414 if (TARGET_REALLY_IWMMXT)
26415 switch (mode)
26417 case SImode:
26418 return V2SImode;
26419 case HImode:
26420 return V4HImode;
26421 case QImode:
26422 return V8QImode;
26424 default:;
26427 return word_mode;
26430 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26432 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26433 using r0-r4 for function arguments, r7 for the stack frame and don't have
26434 enough left over to do doubleword arithmetic. For Thumb-2 all the
26435 potentially problematic instructions accept high registers so this is not
26436 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26437 that require many low registers. */
26438 static bool
26439 arm_class_likely_spilled_p (reg_class_t rclass)
26441 if ((TARGET_THUMB1 && rclass == LO_REGS)
26442 || rclass == CC_REG)
26443 return true;
26445 return false;
26448 /* Implements target hook small_register_classes_for_mode_p. */
26449 bool
26450 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26452 return TARGET_THUMB1;
26455 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26456 ARM insns and therefore guarantee that the shift count is modulo 256.
26457 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26458 guarantee no particular behavior for out-of-range counts. */
26460 static unsigned HOST_WIDE_INT
26461 arm_shift_truncation_mask (machine_mode mode)
26463 return mode == SImode ? 255 : 0;
26467 /* Map internal gcc register numbers to DWARF2 register numbers. */
26469 unsigned int
26470 arm_dbx_register_number (unsigned int regno)
26472 if (regno < 16)
26473 return regno;
26475 if (IS_VFP_REGNUM (regno))
26477 /* See comment in arm_dwarf_register_span. */
26478 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26479 return 64 + regno - FIRST_VFP_REGNUM;
26480 else
26481 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26484 if (IS_IWMMXT_GR_REGNUM (regno))
26485 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26487 if (IS_IWMMXT_REGNUM (regno))
26488 return 112 + regno - FIRST_IWMMXT_REGNUM;
26490 gcc_unreachable ();
26493 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26494 GCC models tham as 64 32-bit registers, so we need to describe this to
26495 the DWARF generation code. Other registers can use the default. */
26496 static rtx
26497 arm_dwarf_register_span (rtx rtl)
26499 machine_mode mode;
26500 unsigned regno;
26501 rtx parts[16];
26502 int nregs;
26503 int i;
26505 regno = REGNO (rtl);
26506 if (!IS_VFP_REGNUM (regno))
26507 return NULL_RTX;
26509 /* XXX FIXME: The EABI defines two VFP register ranges:
26510 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26511 256-287: D0-D31
26512 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26513 corresponding D register. Until GDB supports this, we shall use the
26514 legacy encodings. We also use these encodings for D0-D15 for
26515 compatibility with older debuggers. */
26516 mode = GET_MODE (rtl);
26517 if (GET_MODE_SIZE (mode) < 8)
26518 return NULL_RTX;
26520 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26522 nregs = GET_MODE_SIZE (mode) / 4;
26523 for (i = 0; i < nregs; i += 2)
26524 if (TARGET_BIG_END)
26526 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
26527 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
26529 else
26531 parts[i] = gen_rtx_REG (SImode, regno + i);
26532 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
26535 else
26537 nregs = GET_MODE_SIZE (mode) / 8;
26538 for (i = 0; i < nregs; i++)
26539 parts[i] = gen_rtx_REG (DImode, regno + i);
26542 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
26545 #if ARM_UNWIND_INFO
26546 /* Emit unwind directives for a store-multiple instruction or stack pointer
26547 push during alignment.
26548 These should only ever be generated by the function prologue code, so
26549 expect them to have a particular form.
26550 The store-multiple instruction sometimes pushes pc as the last register,
26551 although it should not be tracked into unwind information, or for -Os
26552 sometimes pushes some dummy registers before first register that needs
26553 to be tracked in unwind information; such dummy registers are there just
26554 to avoid separate stack adjustment, and will not be restored in the
26555 epilogue. */
26557 static void
26558 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26560 int i;
26561 HOST_WIDE_INT offset;
26562 HOST_WIDE_INT nregs;
26563 int reg_size;
26564 unsigned reg;
26565 unsigned lastreg;
26566 unsigned padfirst = 0, padlast = 0;
26567 rtx e;
26569 e = XVECEXP (p, 0, 0);
26570 gcc_assert (GET_CODE (e) == SET);
26572 /* First insn will adjust the stack pointer. */
26573 gcc_assert (GET_CODE (e) == SET
26574 && REG_P (SET_DEST (e))
26575 && REGNO (SET_DEST (e)) == SP_REGNUM
26576 && GET_CODE (SET_SRC (e)) == PLUS);
26578 offset = -INTVAL (XEXP (SET_SRC (e), 1));
26579 nregs = XVECLEN (p, 0) - 1;
26580 gcc_assert (nregs);
26582 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
26583 if (reg < 16)
26585 /* For -Os dummy registers can be pushed at the beginning to
26586 avoid separate stack pointer adjustment. */
26587 e = XVECEXP (p, 0, 1);
26588 e = XEXP (SET_DEST (e), 0);
26589 if (GET_CODE (e) == PLUS)
26590 padfirst = INTVAL (XEXP (e, 1));
26591 gcc_assert (padfirst == 0 || optimize_size);
26592 /* The function prologue may also push pc, but not annotate it as it is
26593 never restored. We turn this into a stack pointer adjustment. */
26594 e = XVECEXP (p, 0, nregs);
26595 e = XEXP (SET_DEST (e), 0);
26596 if (GET_CODE (e) == PLUS)
26597 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
26598 else
26599 padlast = offset - 4;
26600 gcc_assert (padlast == 0 || padlast == 4);
26601 if (padlast == 4)
26602 fprintf (asm_out_file, "\t.pad #4\n");
26603 reg_size = 4;
26604 fprintf (asm_out_file, "\t.save {");
26606 else if (IS_VFP_REGNUM (reg))
26608 reg_size = 8;
26609 fprintf (asm_out_file, "\t.vsave {");
26611 else
26612 /* Unknown register type. */
26613 gcc_unreachable ();
26615 /* If the stack increment doesn't match the size of the saved registers,
26616 something has gone horribly wrong. */
26617 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
26619 offset = padfirst;
26620 lastreg = 0;
26621 /* The remaining insns will describe the stores. */
26622 for (i = 1; i <= nregs; i++)
26624 /* Expect (set (mem <addr>) (reg)).
26625 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
26626 e = XVECEXP (p, 0, i);
26627 gcc_assert (GET_CODE (e) == SET
26628 && MEM_P (SET_DEST (e))
26629 && REG_P (SET_SRC (e)));
26631 reg = REGNO (SET_SRC (e));
26632 gcc_assert (reg >= lastreg);
26634 if (i != 1)
26635 fprintf (asm_out_file, ", ");
26636 /* We can't use %r for vfp because we need to use the
26637 double precision register names. */
26638 if (IS_VFP_REGNUM (reg))
26639 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
26640 else
26641 asm_fprintf (asm_out_file, "%r", reg);
26643 #ifdef ENABLE_CHECKING
26644 /* Check that the addresses are consecutive. */
26645 e = XEXP (SET_DEST (e), 0);
26646 if (GET_CODE (e) == PLUS)
26647 gcc_assert (REG_P (XEXP (e, 0))
26648 && REGNO (XEXP (e, 0)) == SP_REGNUM
26649 && CONST_INT_P (XEXP (e, 1))
26650 && offset == INTVAL (XEXP (e, 1)));
26651 else
26652 gcc_assert (i == 1
26653 && REG_P (e)
26654 && REGNO (e) == SP_REGNUM);
26655 offset += reg_size;
26656 #endif
26658 fprintf (asm_out_file, "}\n");
26659 if (padfirst)
26660 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
26663 /* Emit unwind directives for a SET. */
26665 static void
26666 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
26668 rtx e0;
26669 rtx e1;
26670 unsigned reg;
26672 e0 = XEXP (p, 0);
26673 e1 = XEXP (p, 1);
26674 switch (GET_CODE (e0))
26676 case MEM:
26677 /* Pushing a single register. */
26678 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
26679 || !REG_P (XEXP (XEXP (e0, 0), 0))
26680 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
26681 abort ();
26683 asm_fprintf (asm_out_file, "\t.save ");
26684 if (IS_VFP_REGNUM (REGNO (e1)))
26685 asm_fprintf(asm_out_file, "{d%d}\n",
26686 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
26687 else
26688 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
26689 break;
26691 case REG:
26692 if (REGNO (e0) == SP_REGNUM)
26694 /* A stack increment. */
26695 if (GET_CODE (e1) != PLUS
26696 || !REG_P (XEXP (e1, 0))
26697 || REGNO (XEXP (e1, 0)) != SP_REGNUM
26698 || !CONST_INT_P (XEXP (e1, 1)))
26699 abort ();
26701 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
26702 -INTVAL (XEXP (e1, 1)));
26704 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
26706 HOST_WIDE_INT offset;
26708 if (GET_CODE (e1) == PLUS)
26710 if (!REG_P (XEXP (e1, 0))
26711 || !CONST_INT_P (XEXP (e1, 1)))
26712 abort ();
26713 reg = REGNO (XEXP (e1, 0));
26714 offset = INTVAL (XEXP (e1, 1));
26715 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
26716 HARD_FRAME_POINTER_REGNUM, reg,
26717 offset);
26719 else if (REG_P (e1))
26721 reg = REGNO (e1);
26722 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
26723 HARD_FRAME_POINTER_REGNUM, reg);
26725 else
26726 abort ();
26728 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
26730 /* Move from sp to reg. */
26731 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
26733 else if (GET_CODE (e1) == PLUS
26734 && REG_P (XEXP (e1, 0))
26735 && REGNO (XEXP (e1, 0)) == SP_REGNUM
26736 && CONST_INT_P (XEXP (e1, 1)))
26738 /* Set reg to offset from sp. */
26739 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
26740 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
26742 else
26743 abort ();
26744 break;
26746 default:
26747 abort ();
26752 /* Emit unwind directives for the given insn. */
26754 static void
26755 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
26757 rtx note, pat;
26758 bool handled_one = false;
26760 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26761 return;
26763 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26764 && (TREE_NOTHROW (current_function_decl)
26765 || crtl->all_throwers_are_sibcalls))
26766 return;
26768 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
26769 return;
26771 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
26773 switch (REG_NOTE_KIND (note))
26775 case REG_FRAME_RELATED_EXPR:
26776 pat = XEXP (note, 0);
26777 goto found;
26779 case REG_CFA_REGISTER:
26780 pat = XEXP (note, 0);
26781 if (pat == NULL)
26783 pat = PATTERN (insn);
26784 if (GET_CODE (pat) == PARALLEL)
26785 pat = XVECEXP (pat, 0, 0);
26788 /* Only emitted for IS_STACKALIGN re-alignment. */
26790 rtx dest, src;
26791 unsigned reg;
26793 src = SET_SRC (pat);
26794 dest = SET_DEST (pat);
26796 gcc_assert (src == stack_pointer_rtx);
26797 reg = REGNO (dest);
26798 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26799 reg + 0x90, reg);
26801 handled_one = true;
26802 break;
26804 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
26805 to get correct dwarf information for shrink-wrap. We should not
26806 emit unwind information for it because these are used either for
26807 pretend arguments or notes to adjust sp and restore registers from
26808 stack. */
26809 case REG_CFA_DEF_CFA:
26810 case REG_CFA_ADJUST_CFA:
26811 case REG_CFA_RESTORE:
26812 return;
26814 case REG_CFA_EXPRESSION:
26815 case REG_CFA_OFFSET:
26816 /* ??? Only handling here what we actually emit. */
26817 gcc_unreachable ();
26819 default:
26820 break;
26823 if (handled_one)
26824 return;
26825 pat = PATTERN (insn);
26826 found:
26828 switch (GET_CODE (pat))
26830 case SET:
26831 arm_unwind_emit_set (asm_out_file, pat);
26832 break;
26834 case SEQUENCE:
26835 /* Store multiple. */
26836 arm_unwind_emit_sequence (asm_out_file, pat);
26837 break;
26839 default:
26840 abort();
26845 /* Output a reference from a function exception table to the type_info
26846 object X. The EABI specifies that the symbol should be relocated by
26847 an R_ARM_TARGET2 relocation. */
26849 static bool
26850 arm_output_ttype (rtx x)
26852 fputs ("\t.word\t", asm_out_file);
26853 output_addr_const (asm_out_file, x);
26854 /* Use special relocations for symbol references. */
26855 if (!CONST_INT_P (x))
26856 fputs ("(TARGET2)", asm_out_file);
26857 fputc ('\n', asm_out_file);
26859 return TRUE;
26862 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
26864 static void
26865 arm_asm_emit_except_personality (rtx personality)
26867 fputs ("\t.personality\t", asm_out_file);
26868 output_addr_const (asm_out_file, personality);
26869 fputc ('\n', asm_out_file);
26872 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
26874 static void
26875 arm_asm_init_sections (void)
26877 exception_section = get_unnamed_section (0, output_section_asm_op,
26878 "\t.handlerdata");
26880 #endif /* ARM_UNWIND_INFO */
26882 /* Output unwind directives for the start/end of a function. */
26884 void
26885 arm_output_fn_unwind (FILE * f, bool prologue)
26887 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26888 return;
26890 if (prologue)
26891 fputs ("\t.fnstart\n", f);
26892 else
26894 /* If this function will never be unwound, then mark it as such.
26895 The came condition is used in arm_unwind_emit to suppress
26896 the frame annotations. */
26897 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26898 && (TREE_NOTHROW (current_function_decl)
26899 || crtl->all_throwers_are_sibcalls))
26900 fputs("\t.cantunwind\n", f);
26902 fputs ("\t.fnend\n", f);
26906 static bool
26907 arm_emit_tls_decoration (FILE *fp, rtx x)
26909 enum tls_reloc reloc;
26910 rtx val;
26912 val = XVECEXP (x, 0, 0);
26913 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
26915 output_addr_const (fp, val);
26917 switch (reloc)
26919 case TLS_GD32:
26920 fputs ("(tlsgd)", fp);
26921 break;
26922 case TLS_LDM32:
26923 fputs ("(tlsldm)", fp);
26924 break;
26925 case TLS_LDO32:
26926 fputs ("(tlsldo)", fp);
26927 break;
26928 case TLS_IE32:
26929 fputs ("(gottpoff)", fp);
26930 break;
26931 case TLS_LE32:
26932 fputs ("(tpoff)", fp);
26933 break;
26934 case TLS_DESCSEQ:
26935 fputs ("(tlsdesc)", fp);
26936 break;
26937 default:
26938 gcc_unreachable ();
26941 switch (reloc)
26943 case TLS_GD32:
26944 case TLS_LDM32:
26945 case TLS_IE32:
26946 case TLS_DESCSEQ:
26947 fputs (" + (. - ", fp);
26948 output_addr_const (fp, XVECEXP (x, 0, 2));
26949 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
26950 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
26951 output_addr_const (fp, XVECEXP (x, 0, 3));
26952 fputc (')', fp);
26953 break;
26954 default:
26955 break;
26958 return TRUE;
26961 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
26963 static void
26964 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
26966 gcc_assert (size == 4);
26967 fputs ("\t.word\t", file);
26968 output_addr_const (file, x);
26969 fputs ("(tlsldo)", file);
26972 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
26974 static bool
26975 arm_output_addr_const_extra (FILE *fp, rtx x)
26977 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
26978 return arm_emit_tls_decoration (fp, x);
26979 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
26981 char label[256];
26982 int labelno = INTVAL (XVECEXP (x, 0, 0));
26984 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
26985 assemble_name_raw (fp, label);
26987 return TRUE;
26989 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
26991 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
26992 if (GOT_PCREL)
26993 fputs ("+.", fp);
26994 fputs ("-(", fp);
26995 output_addr_const (fp, XVECEXP (x, 0, 0));
26996 fputc (')', fp);
26997 return TRUE;
26999 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27001 output_addr_const (fp, XVECEXP (x, 0, 0));
27002 if (GOT_PCREL)
27003 fputs ("+.", fp);
27004 fputs ("-(", fp);
27005 output_addr_const (fp, XVECEXP (x, 0, 1));
27006 fputc (')', fp);
27007 return TRUE;
27009 else if (GET_CODE (x) == CONST_VECTOR)
27010 return arm_emit_vector_const (fp, x);
27012 return FALSE;
27015 /* Output assembly for a shift instruction.
27016 SET_FLAGS determines how the instruction modifies the condition codes.
27017 0 - Do not set condition codes.
27018 1 - Set condition codes.
27019 2 - Use smallest instruction. */
27020 const char *
27021 arm_output_shift(rtx * operands, int set_flags)
27023 char pattern[100];
27024 static const char flag_chars[3] = {'?', '.', '!'};
27025 const char *shift;
27026 HOST_WIDE_INT val;
27027 char c;
27029 c = flag_chars[set_flags];
27030 if (TARGET_UNIFIED_ASM)
27032 shift = shift_op(operands[3], &val);
27033 if (shift)
27035 if (val != -1)
27036 operands[2] = GEN_INT(val);
27037 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27039 else
27040 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27042 else
27043 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
27044 output_asm_insn (pattern, operands);
27045 return "";
27048 /* Output assembly for a WMMX immediate shift instruction. */
27049 const char *
27050 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27052 int shift = INTVAL (operands[2]);
27053 char templ[50];
27054 machine_mode opmode = GET_MODE (operands[0]);
27056 gcc_assert (shift >= 0);
27058 /* If the shift value in the register versions is > 63 (for D qualifier),
27059 31 (for W qualifier) or 15 (for H qualifier). */
27060 if (((opmode == V4HImode) && (shift > 15))
27061 || ((opmode == V2SImode) && (shift > 31))
27062 || ((opmode == DImode) && (shift > 63)))
27064 if (wror_or_wsra)
27066 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27067 output_asm_insn (templ, operands);
27068 if (opmode == DImode)
27070 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27071 output_asm_insn (templ, operands);
27074 else
27076 /* The destination register will contain all zeros. */
27077 sprintf (templ, "wzero\t%%0");
27078 output_asm_insn (templ, operands);
27080 return "";
27083 if ((opmode == DImode) && (shift > 32))
27085 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27086 output_asm_insn (templ, operands);
27087 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27088 output_asm_insn (templ, operands);
27090 else
27092 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27093 output_asm_insn (templ, operands);
27095 return "";
27098 /* Output assembly for a WMMX tinsr instruction. */
27099 const char *
27100 arm_output_iwmmxt_tinsr (rtx *operands)
27102 int mask = INTVAL (operands[3]);
27103 int i;
27104 char templ[50];
27105 int units = mode_nunits[GET_MODE (operands[0])];
27106 gcc_assert ((mask & (mask - 1)) == 0);
27107 for (i = 0; i < units; ++i)
27109 if ((mask & 0x01) == 1)
27111 break;
27113 mask >>= 1;
27115 gcc_assert (i < units);
27117 switch (GET_MODE (operands[0]))
27119 case V8QImode:
27120 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27121 break;
27122 case V4HImode:
27123 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27124 break;
27125 case V2SImode:
27126 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27127 break;
27128 default:
27129 gcc_unreachable ();
27130 break;
27132 output_asm_insn (templ, operands);
27134 return "";
27137 /* Output a Thumb-1 casesi dispatch sequence. */
27138 const char *
27139 thumb1_output_casesi (rtx *operands)
27141 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27143 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27145 switch (GET_MODE(diff_vec))
27147 case QImode:
27148 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27149 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27150 case HImode:
27151 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27152 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27153 case SImode:
27154 return "bl\t%___gnu_thumb1_case_si";
27155 default:
27156 gcc_unreachable ();
27160 /* Output a Thumb-2 casesi instruction. */
27161 const char *
27162 thumb2_output_casesi (rtx *operands)
27164 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27166 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27168 output_asm_insn ("cmp\t%0, %1", operands);
27169 output_asm_insn ("bhi\t%l3", operands);
27170 switch (GET_MODE(diff_vec))
27172 case QImode:
27173 return "tbb\t[%|pc, %0]";
27174 case HImode:
27175 return "tbh\t[%|pc, %0, lsl #1]";
27176 case SImode:
27177 if (flag_pic)
27179 output_asm_insn ("adr\t%4, %l2", operands);
27180 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27181 output_asm_insn ("add\t%4, %4, %5", operands);
27182 return "bx\t%4";
27184 else
27186 output_asm_insn ("adr\t%4, %l2", operands);
27187 return "ldr\t%|pc, [%4, %0, lsl #2]";
27189 default:
27190 gcc_unreachable ();
27194 /* Most ARM cores are single issue, but some newer ones can dual issue.
27195 The scheduler descriptions rely on this being correct. */
27196 static int
27197 arm_issue_rate (void)
27199 switch (arm_tune)
27201 case xgene1:
27202 return 4;
27204 case cortexa15:
27205 case cortexa57:
27206 case exynosm1:
27207 return 3;
27209 case cortexm7:
27210 case cortexr4:
27211 case cortexr4f:
27212 case cortexr5:
27213 case genericv7a:
27214 case cortexa5:
27215 case cortexa7:
27216 case cortexa8:
27217 case cortexa9:
27218 case cortexa12:
27219 case cortexa17:
27220 case cortexa53:
27221 case fa726te:
27222 case marvell_pj4:
27223 return 2;
27225 default:
27226 return 1;
27230 /* Return how many instructions should scheduler lookahead to choose the
27231 best one. */
27232 static int
27233 arm_first_cycle_multipass_dfa_lookahead (void)
27235 int issue_rate = arm_issue_rate ();
27237 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27240 /* Enable modeling of L2 auto-prefetcher. */
27241 static int
27242 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27244 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27247 const char *
27248 arm_mangle_type (const_tree type)
27250 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27251 has to be managled as if it is in the "std" namespace. */
27252 if (TARGET_AAPCS_BASED
27253 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27254 return "St9__va_list";
27256 /* Half-precision float. */
27257 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27258 return "Dh";
27260 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27261 builtin type. */
27262 if (TYPE_NAME (type) != NULL)
27263 return arm_mangle_builtin_type (type);
27265 /* Use the default mangling. */
27266 return NULL;
27269 /* Order of allocation of core registers for Thumb: this allocation is
27270 written over the corresponding initial entries of the array
27271 initialized with REG_ALLOC_ORDER. We allocate all low registers
27272 first. Saving and restoring a low register is usually cheaper than
27273 using a call-clobbered high register. */
27275 static const int thumb_core_reg_alloc_order[] =
27277 3, 2, 1, 0, 4, 5, 6, 7,
27278 14, 12, 8, 9, 10, 11
27281 /* Adjust register allocation order when compiling for Thumb. */
27283 void
27284 arm_order_regs_for_local_alloc (void)
27286 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27287 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27288 if (TARGET_THUMB)
27289 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27290 sizeof (thumb_core_reg_alloc_order));
27293 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27295 bool
27296 arm_frame_pointer_required (void)
27298 return (cfun->has_nonlocal_label
27299 || SUBTARGET_FRAME_POINTER_REQUIRED
27300 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
27303 /* Only thumb1 can't support conditional execution, so return true if
27304 the target is not thumb1. */
27305 static bool
27306 arm_have_conditional_execution (void)
27308 return !TARGET_THUMB1;
27311 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27312 static HOST_WIDE_INT
27313 arm_vector_alignment (const_tree type)
27315 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27317 if (TARGET_AAPCS_BASED)
27318 align = MIN (align, 64);
27320 return align;
27323 static unsigned int
27324 arm_autovectorize_vector_sizes (void)
27326 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27329 static bool
27330 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27332 /* Vectors which aren't in packed structures will not be less aligned than
27333 the natural alignment of their element type, so this is safe. */
27334 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27335 return !is_packed;
27337 return default_builtin_vector_alignment_reachable (type, is_packed);
27340 static bool
27341 arm_builtin_support_vector_misalignment (machine_mode mode,
27342 const_tree type, int misalignment,
27343 bool is_packed)
27345 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27347 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27349 if (is_packed)
27350 return align == 1;
27352 /* If the misalignment is unknown, we should be able to handle the access
27353 so long as it is not to a member of a packed data structure. */
27354 if (misalignment == -1)
27355 return true;
27357 /* Return true if the misalignment is a multiple of the natural alignment
27358 of the vector's element type. This is probably always going to be
27359 true in practice, since we've already established that this isn't a
27360 packed access. */
27361 return ((misalignment % align) == 0);
27364 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27365 is_packed);
27368 static void
27369 arm_conditional_register_usage (void)
27371 int regno;
27373 if (TARGET_THUMB1 && optimize_size)
27375 /* When optimizing for size on Thumb-1, it's better not
27376 to use the HI regs, because of the overhead of
27377 stacking them. */
27378 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27379 fixed_regs[regno] = call_used_regs[regno] = 1;
27382 /* The link register can be clobbered by any branch insn,
27383 but we have no way to track that at present, so mark
27384 it as unavailable. */
27385 if (TARGET_THUMB1)
27386 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27388 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
27390 /* VFPv3 registers are disabled when earlier VFP
27391 versions are selected due to the definition of
27392 LAST_VFP_REGNUM. */
27393 for (regno = FIRST_VFP_REGNUM;
27394 regno <= LAST_VFP_REGNUM; ++ regno)
27396 fixed_regs[regno] = 0;
27397 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27398 || regno >= FIRST_VFP_REGNUM + 32;
27402 if (TARGET_REALLY_IWMMXT)
27404 regno = FIRST_IWMMXT_GR_REGNUM;
27405 /* The 2002/10/09 revision of the XScale ABI has wCG0
27406 and wCG1 as call-preserved registers. The 2002/11/21
27407 revision changed this so that all wCG registers are
27408 scratch registers. */
27409 for (regno = FIRST_IWMMXT_GR_REGNUM;
27410 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27411 fixed_regs[regno] = 0;
27412 /* The XScale ABI has wR0 - wR9 as scratch registers,
27413 the rest as call-preserved registers. */
27414 for (regno = FIRST_IWMMXT_REGNUM;
27415 regno <= LAST_IWMMXT_REGNUM; ++ regno)
27417 fixed_regs[regno] = 0;
27418 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27422 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27424 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27425 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27427 else if (TARGET_APCS_STACK)
27429 fixed_regs[10] = 1;
27430 call_used_regs[10] = 1;
27432 /* -mcaller-super-interworking reserves r11 for calls to
27433 _interwork_r11_call_via_rN(). Making the register global
27434 is an easy way of ensuring that it remains valid for all
27435 calls. */
27436 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27437 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27439 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27440 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27441 if (TARGET_CALLER_INTERWORKING)
27442 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27444 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27447 static reg_class_t
27448 arm_preferred_rename_class (reg_class_t rclass)
27450 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27451 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27452 and code size can be reduced. */
27453 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27454 return LO_REGS;
27455 else
27456 return NO_REGS;
27459 /* Compute the atrribute "length" of insn "*push_multi".
27460 So this function MUST be kept in sync with that insn pattern. */
27462 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27464 int i, regno, hi_reg;
27465 int num_saves = XVECLEN (parallel_op, 0);
27467 /* ARM mode. */
27468 if (TARGET_ARM)
27469 return 4;
27470 /* Thumb1 mode. */
27471 if (TARGET_THUMB1)
27472 return 2;
27474 /* Thumb2 mode. */
27475 regno = REGNO (first_op);
27476 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27477 for (i = 1; i < num_saves && !hi_reg; i++)
27479 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27480 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27483 if (!hi_reg)
27484 return 2;
27485 return 4;
27488 /* Compute the number of instructions emitted by output_move_double. */
27490 arm_count_output_move_double_insns (rtx *operands)
27492 int count;
27493 rtx ops[2];
27494 /* output_move_double may modify the operands array, so call it
27495 here on a copy of the array. */
27496 ops[0] = operands[0];
27497 ops[1] = operands[1];
27498 output_move_double (ops, false, &count);
27499 return count;
27503 vfp3_const_double_for_fract_bits (rtx operand)
27505 REAL_VALUE_TYPE r0;
27507 if (!CONST_DOUBLE_P (operand))
27508 return 0;
27510 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27511 if (exact_real_inverse (DFmode, &r0))
27513 if (exact_real_truncate (DFmode, &r0))
27515 HOST_WIDE_INT value = real_to_integer (&r0);
27516 value = value & 0xffffffff;
27517 if ((value != 0) && ( (value & (value - 1)) == 0))
27518 return int_log2 (value);
27521 return 0;
27525 vfp3_const_double_for_bits (rtx operand)
27527 REAL_VALUE_TYPE r0;
27529 if (!CONST_DOUBLE_P (operand))
27530 return 0;
27532 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27533 if (exact_real_truncate (DFmode, &r0))
27535 HOST_WIDE_INT value = real_to_integer (&r0);
27536 value = value & 0xffffffff;
27537 if ((value != 0) && ( (value & (value - 1)) == 0))
27538 return int_log2 (value);
27541 return 0;
27544 /* Emit a memory barrier around an atomic sequence according to MODEL. */
27546 static void
27547 arm_pre_atomic_barrier (enum memmodel model)
27549 if (need_atomic_barrier_p (model, true))
27550 emit_insn (gen_memory_barrier ());
27553 static void
27554 arm_post_atomic_barrier (enum memmodel model)
27556 if (need_atomic_barrier_p (model, false))
27557 emit_insn (gen_memory_barrier ());
27560 /* Emit the load-exclusive and store-exclusive instructions.
27561 Use acquire and release versions if necessary. */
27563 static void
27564 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
27566 rtx (*gen) (rtx, rtx);
27568 if (acq)
27570 switch (mode)
27572 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
27573 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
27574 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
27575 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
27576 default:
27577 gcc_unreachable ();
27580 else
27582 switch (mode)
27584 case QImode: gen = gen_arm_load_exclusiveqi; break;
27585 case HImode: gen = gen_arm_load_exclusivehi; break;
27586 case SImode: gen = gen_arm_load_exclusivesi; break;
27587 case DImode: gen = gen_arm_load_exclusivedi; break;
27588 default:
27589 gcc_unreachable ();
27593 emit_insn (gen (rval, mem));
27596 static void
27597 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
27598 rtx mem, bool rel)
27600 rtx (*gen) (rtx, rtx, rtx);
27602 if (rel)
27604 switch (mode)
27606 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
27607 case HImode: gen = gen_arm_store_release_exclusivehi; break;
27608 case SImode: gen = gen_arm_store_release_exclusivesi; break;
27609 case DImode: gen = gen_arm_store_release_exclusivedi; break;
27610 default:
27611 gcc_unreachable ();
27614 else
27616 switch (mode)
27618 case QImode: gen = gen_arm_store_exclusiveqi; break;
27619 case HImode: gen = gen_arm_store_exclusivehi; break;
27620 case SImode: gen = gen_arm_store_exclusivesi; break;
27621 case DImode: gen = gen_arm_store_exclusivedi; break;
27622 default:
27623 gcc_unreachable ();
27627 emit_insn (gen (bval, rval, mem));
27630 /* Mark the previous jump instruction as unlikely. */
27632 static void
27633 emit_unlikely_jump (rtx insn)
27635 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
27637 insn = emit_jump_insn (insn);
27638 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
27641 /* Expand a compare and swap pattern. */
27643 void
27644 arm_expand_compare_and_swap (rtx operands[])
27646 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
27647 machine_mode mode;
27648 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
27650 bval = operands[0];
27651 rval = operands[1];
27652 mem = operands[2];
27653 oldval = operands[3];
27654 newval = operands[4];
27655 is_weak = operands[5];
27656 mod_s = operands[6];
27657 mod_f = operands[7];
27658 mode = GET_MODE (mem);
27660 /* Normally the succ memory model must be stronger than fail, but in the
27661 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27662 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
27664 if (TARGET_HAVE_LDACQ
27665 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
27666 && INTVAL (mod_s) == MEMMODEL_RELEASE)
27667 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
27669 switch (mode)
27671 case QImode:
27672 case HImode:
27673 /* For narrow modes, we're going to perform the comparison in SImode,
27674 so do the zero-extension now. */
27675 rval = gen_reg_rtx (SImode);
27676 oldval = convert_modes (SImode, mode, oldval, true);
27677 /* FALLTHRU */
27679 case SImode:
27680 /* Force the value into a register if needed. We waited until after
27681 the zero-extension above to do this properly. */
27682 if (!arm_add_operand (oldval, SImode))
27683 oldval = force_reg (SImode, oldval);
27684 break;
27686 case DImode:
27687 if (!cmpdi_operand (oldval, mode))
27688 oldval = force_reg (mode, oldval);
27689 break;
27691 default:
27692 gcc_unreachable ();
27695 switch (mode)
27697 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
27698 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
27699 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
27700 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
27701 default:
27702 gcc_unreachable ();
27705 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
27707 if (mode == QImode || mode == HImode)
27708 emit_move_insn (operands[1], gen_lowpart (mode, rval));
27710 /* In all cases, we arrange for success to be signaled by Z set.
27711 This arrangement allows for the boolean result to be used directly
27712 in a subsequent branch, post optimization. */
27713 x = gen_rtx_REG (CCmode, CC_REGNUM);
27714 x = gen_rtx_EQ (SImode, x, const0_rtx);
27715 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
27718 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
27719 another memory store between the load-exclusive and store-exclusive can
27720 reset the monitor from Exclusive to Open state. This means we must wait
27721 until after reload to split the pattern, lest we get a register spill in
27722 the middle of the atomic sequence. */
27724 void
27725 arm_split_compare_and_swap (rtx operands[])
27727 rtx rval, mem, oldval, newval, scratch;
27728 machine_mode mode;
27729 enum memmodel mod_s, mod_f;
27730 bool is_weak;
27731 rtx_code_label *label1, *label2;
27732 rtx x, cond;
27734 rval = operands[0];
27735 mem = operands[1];
27736 oldval = operands[2];
27737 newval = operands[3];
27738 is_weak = (operands[4] != const0_rtx);
27739 mod_s = (enum memmodel) INTVAL (operands[5]);
27740 mod_f = (enum memmodel) INTVAL (operands[6]);
27741 scratch = operands[7];
27742 mode = GET_MODE (mem);
27744 bool use_acquire = TARGET_HAVE_LDACQ
27745 && !(mod_s == MEMMODEL_RELAXED
27746 || mod_s == MEMMODEL_CONSUME
27747 || mod_s == MEMMODEL_RELEASE);
27749 bool use_release = TARGET_HAVE_LDACQ
27750 && !(mod_s == MEMMODEL_RELAXED
27751 || mod_s == MEMMODEL_CONSUME
27752 || mod_s == MEMMODEL_ACQUIRE);
27754 /* Checks whether a barrier is needed and emits one accordingly. */
27755 if (!(use_acquire || use_release))
27756 arm_pre_atomic_barrier (mod_s);
27758 label1 = NULL;
27759 if (!is_weak)
27761 label1 = gen_label_rtx ();
27762 emit_label (label1);
27764 label2 = gen_label_rtx ();
27766 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
27768 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
27769 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27770 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27771 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
27772 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
27774 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
27776 /* Weak or strong, we want EQ to be true for success, so that we
27777 match the flags that we got from the compare above. */
27778 cond = gen_rtx_REG (CCmode, CC_REGNUM);
27779 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
27780 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
27782 if (!is_weak)
27784 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27785 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27786 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
27787 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
27790 if (mod_f != MEMMODEL_RELAXED)
27791 emit_label (label2);
27793 /* Checks whether a barrier is needed and emits one accordingly. */
27794 if (!(use_acquire || use_release))
27795 arm_post_atomic_barrier (mod_s);
27797 if (mod_f == MEMMODEL_RELAXED)
27798 emit_label (label2);
27801 void
27802 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
27803 rtx value, rtx model_rtx, rtx cond)
27805 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
27806 machine_mode mode = GET_MODE (mem);
27807 machine_mode wmode = (mode == DImode ? DImode : SImode);
27808 rtx_code_label *label;
27809 rtx x;
27811 bool use_acquire = TARGET_HAVE_LDACQ
27812 && !(model == MEMMODEL_RELAXED
27813 || model == MEMMODEL_CONSUME
27814 || model == MEMMODEL_RELEASE);
27816 bool use_release = TARGET_HAVE_LDACQ
27817 && !(model == MEMMODEL_RELAXED
27818 || model == MEMMODEL_CONSUME
27819 || model == MEMMODEL_ACQUIRE);
27821 /* Checks whether a barrier is needed and emits one accordingly. */
27822 if (!(use_acquire || use_release))
27823 arm_pre_atomic_barrier (model);
27825 label = gen_label_rtx ();
27826 emit_label (label);
27828 if (new_out)
27829 new_out = gen_lowpart (wmode, new_out);
27830 if (old_out)
27831 old_out = gen_lowpart (wmode, old_out);
27832 else
27833 old_out = new_out;
27834 value = simplify_gen_subreg (wmode, value, mode, 0);
27836 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
27838 switch (code)
27840 case SET:
27841 new_out = value;
27842 break;
27844 case NOT:
27845 x = gen_rtx_AND (wmode, old_out, value);
27846 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27847 x = gen_rtx_NOT (wmode, new_out);
27848 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27849 break;
27851 case MINUS:
27852 if (CONST_INT_P (value))
27854 value = GEN_INT (-INTVAL (value));
27855 code = PLUS;
27857 /* FALLTHRU */
27859 case PLUS:
27860 if (mode == DImode)
27862 /* DImode plus/minus need to clobber flags. */
27863 /* The adddi3 and subdi3 patterns are incorrectly written so that
27864 they require matching operands, even when we could easily support
27865 three operands. Thankfully, this can be fixed up post-splitting,
27866 as the individual add+adc patterns do accept three operands and
27867 post-reload cprop can make these moves go away. */
27868 emit_move_insn (new_out, old_out);
27869 if (code == PLUS)
27870 x = gen_adddi3 (new_out, new_out, value);
27871 else
27872 x = gen_subdi3 (new_out, new_out, value);
27873 emit_insn (x);
27874 break;
27876 /* FALLTHRU */
27878 default:
27879 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
27880 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27881 break;
27884 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
27885 use_release);
27887 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27888 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
27890 /* Checks whether a barrier is needed and emits one accordingly. */
27891 if (!(use_acquire || use_release))
27892 arm_post_atomic_barrier (model);
27895 #define MAX_VECT_LEN 16
27897 struct expand_vec_perm_d
27899 rtx target, op0, op1;
27900 unsigned char perm[MAX_VECT_LEN];
27901 machine_mode vmode;
27902 unsigned char nelt;
27903 bool one_vector_p;
27904 bool testing_p;
27907 /* Generate a variable permutation. */
27909 static void
27910 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
27912 machine_mode vmode = GET_MODE (target);
27913 bool one_vector_p = rtx_equal_p (op0, op1);
27915 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
27916 gcc_checking_assert (GET_MODE (op0) == vmode);
27917 gcc_checking_assert (GET_MODE (op1) == vmode);
27918 gcc_checking_assert (GET_MODE (sel) == vmode);
27919 gcc_checking_assert (TARGET_NEON);
27921 if (one_vector_p)
27923 if (vmode == V8QImode)
27924 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
27925 else
27926 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
27928 else
27930 rtx pair;
27932 if (vmode == V8QImode)
27934 pair = gen_reg_rtx (V16QImode);
27935 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
27936 pair = gen_lowpart (TImode, pair);
27937 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
27939 else
27941 pair = gen_reg_rtx (OImode);
27942 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
27943 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
27948 void
27949 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
27951 machine_mode vmode = GET_MODE (target);
27952 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
27953 bool one_vector_p = rtx_equal_p (op0, op1);
27954 rtx rmask[MAX_VECT_LEN], mask;
27956 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
27957 numbering of elements for big-endian, we must reverse the order. */
27958 gcc_checking_assert (!BYTES_BIG_ENDIAN);
27960 /* The VTBL instruction does not use a modulo index, so we must take care
27961 of that ourselves. */
27962 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
27963 for (i = 0; i < nelt; ++i)
27964 rmask[i] = mask;
27965 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
27966 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
27968 arm_expand_vec_perm_1 (target, op0, op1, sel);
27971 /* Generate or test for an insn that supports a constant permutation. */
27973 /* Recognize patterns for the VUZP insns. */
27975 static bool
27976 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
27978 unsigned int i, odd, mask, nelt = d->nelt;
27979 rtx out0, out1, in0, in1, x;
27980 rtx (*gen)(rtx, rtx, rtx, rtx);
27982 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27983 return false;
27985 /* Note that these are little-endian tests. Adjust for big-endian later. */
27986 if (d->perm[0] == 0)
27987 odd = 0;
27988 else if (d->perm[0] == 1)
27989 odd = 1;
27990 else
27991 return false;
27992 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27994 for (i = 0; i < nelt; i++)
27996 unsigned elt = (i * 2 + odd) & mask;
27997 if (d->perm[i] != elt)
27998 return false;
28001 /* Success! */
28002 if (d->testing_p)
28003 return true;
28005 switch (d->vmode)
28007 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28008 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
28009 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
28010 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
28011 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
28012 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
28013 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
28014 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
28015 default:
28016 gcc_unreachable ();
28019 in0 = d->op0;
28020 in1 = d->op1;
28021 if (BYTES_BIG_ENDIAN)
28023 x = in0, in0 = in1, in1 = x;
28024 odd = !odd;
28027 out0 = d->target;
28028 out1 = gen_reg_rtx (d->vmode);
28029 if (odd)
28030 x = out0, out0 = out1, out1 = x;
28032 emit_insn (gen (out0, in0, in1, out1));
28033 return true;
28036 /* Recognize patterns for the VZIP insns. */
28038 static bool
28039 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28041 unsigned int i, high, mask, nelt = d->nelt;
28042 rtx out0, out1, in0, in1, x;
28043 rtx (*gen)(rtx, rtx, rtx, rtx);
28045 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28046 return false;
28048 /* Note that these are little-endian tests. Adjust for big-endian later. */
28049 high = nelt / 2;
28050 if (d->perm[0] == high)
28052 else if (d->perm[0] == 0)
28053 high = 0;
28054 else
28055 return false;
28056 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28058 for (i = 0; i < nelt / 2; i++)
28060 unsigned elt = (i + high) & mask;
28061 if (d->perm[i * 2] != elt)
28062 return false;
28063 elt = (elt + nelt) & mask;
28064 if (d->perm[i * 2 + 1] != elt)
28065 return false;
28068 /* Success! */
28069 if (d->testing_p)
28070 return true;
28072 switch (d->vmode)
28074 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28075 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
28076 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
28077 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
28078 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
28079 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
28080 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
28081 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
28082 default:
28083 gcc_unreachable ();
28086 in0 = d->op0;
28087 in1 = d->op1;
28088 if (BYTES_BIG_ENDIAN)
28090 x = in0, in0 = in1, in1 = x;
28091 high = !high;
28094 out0 = d->target;
28095 out1 = gen_reg_rtx (d->vmode);
28096 if (high)
28097 x = out0, out0 = out1, out1 = x;
28099 emit_insn (gen (out0, in0, in1, out1));
28100 return true;
28103 /* Recognize patterns for the VREV insns. */
28105 static bool
28106 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28108 unsigned int i, j, diff, nelt = d->nelt;
28109 rtx (*gen)(rtx, rtx);
28111 if (!d->one_vector_p)
28112 return false;
28114 diff = d->perm[0];
28115 switch (diff)
28117 case 7:
28118 switch (d->vmode)
28120 case V16QImode: gen = gen_neon_vrev64v16qi; break;
28121 case V8QImode: gen = gen_neon_vrev64v8qi; break;
28122 default:
28123 return false;
28125 break;
28126 case 3:
28127 switch (d->vmode)
28129 case V16QImode: gen = gen_neon_vrev32v16qi; break;
28130 case V8QImode: gen = gen_neon_vrev32v8qi; break;
28131 case V8HImode: gen = gen_neon_vrev64v8hi; break;
28132 case V4HImode: gen = gen_neon_vrev64v4hi; break;
28133 default:
28134 return false;
28136 break;
28137 case 1:
28138 switch (d->vmode)
28140 case V16QImode: gen = gen_neon_vrev16v16qi; break;
28141 case V8QImode: gen = gen_neon_vrev16v8qi; break;
28142 case V8HImode: gen = gen_neon_vrev32v8hi; break;
28143 case V4HImode: gen = gen_neon_vrev32v4hi; break;
28144 case V4SImode: gen = gen_neon_vrev64v4si; break;
28145 case V2SImode: gen = gen_neon_vrev64v2si; break;
28146 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
28147 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
28148 default:
28149 return false;
28151 break;
28152 default:
28153 return false;
28156 for (i = 0; i < nelt ; i += diff + 1)
28157 for (j = 0; j <= diff; j += 1)
28159 /* This is guaranteed to be true as the value of diff
28160 is 7, 3, 1 and we should have enough elements in the
28161 queue to generate this. Getting a vector mask with a
28162 value of diff other than these values implies that
28163 something is wrong by the time we get here. */
28164 gcc_assert (i + j < nelt);
28165 if (d->perm[i + j] != i + diff - j)
28166 return false;
28169 /* Success! */
28170 if (d->testing_p)
28171 return true;
28173 emit_insn (gen (d->target, d->op0));
28174 return true;
28177 /* Recognize patterns for the VTRN insns. */
28179 static bool
28180 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
28182 unsigned int i, odd, mask, nelt = d->nelt;
28183 rtx out0, out1, in0, in1, x;
28184 rtx (*gen)(rtx, rtx, rtx, rtx);
28186 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28187 return false;
28189 /* Note that these are little-endian tests. Adjust for big-endian later. */
28190 if (d->perm[0] == 0)
28191 odd = 0;
28192 else if (d->perm[0] == 1)
28193 odd = 1;
28194 else
28195 return false;
28196 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28198 for (i = 0; i < nelt; i += 2)
28200 if (d->perm[i] != i + odd)
28201 return false;
28202 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28203 return false;
28206 /* Success! */
28207 if (d->testing_p)
28208 return true;
28210 switch (d->vmode)
28212 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28213 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
28214 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
28215 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
28216 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
28217 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
28218 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
28219 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
28220 default:
28221 gcc_unreachable ();
28224 in0 = d->op0;
28225 in1 = d->op1;
28226 if (BYTES_BIG_ENDIAN)
28228 x = in0, in0 = in1, in1 = x;
28229 odd = !odd;
28232 out0 = d->target;
28233 out1 = gen_reg_rtx (d->vmode);
28234 if (odd)
28235 x = out0, out0 = out1, out1 = x;
28237 emit_insn (gen (out0, in0, in1, out1));
28238 return true;
28241 /* Recognize patterns for the VEXT insns. */
28243 static bool
28244 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
28246 unsigned int i, nelt = d->nelt;
28247 rtx (*gen) (rtx, rtx, rtx, rtx);
28248 rtx offset;
28250 unsigned int location;
28252 unsigned int next = d->perm[0] + 1;
28254 /* TODO: Handle GCC's numbering of elements for big-endian. */
28255 if (BYTES_BIG_ENDIAN)
28256 return false;
28258 /* Check if the extracted indexes are increasing by one. */
28259 for (i = 1; i < nelt; next++, i++)
28261 /* If we hit the most significant element of the 2nd vector in
28262 the previous iteration, no need to test further. */
28263 if (next == 2 * nelt)
28264 return false;
28266 /* If we are operating on only one vector: it could be a
28267 rotation. If there are only two elements of size < 64, let
28268 arm_evpc_neon_vrev catch it. */
28269 if (d->one_vector_p && (next == nelt))
28271 if ((nelt == 2) && (d->vmode != V2DImode))
28272 return false;
28273 else
28274 next = 0;
28277 if (d->perm[i] != next)
28278 return false;
28281 location = d->perm[0];
28283 switch (d->vmode)
28285 case V16QImode: gen = gen_neon_vextv16qi; break;
28286 case V8QImode: gen = gen_neon_vextv8qi; break;
28287 case V4HImode: gen = gen_neon_vextv4hi; break;
28288 case V8HImode: gen = gen_neon_vextv8hi; break;
28289 case V2SImode: gen = gen_neon_vextv2si; break;
28290 case V4SImode: gen = gen_neon_vextv4si; break;
28291 case V2SFmode: gen = gen_neon_vextv2sf; break;
28292 case V4SFmode: gen = gen_neon_vextv4sf; break;
28293 case V2DImode: gen = gen_neon_vextv2di; break;
28294 default:
28295 return false;
28298 /* Success! */
28299 if (d->testing_p)
28300 return true;
28302 offset = GEN_INT (location);
28303 emit_insn (gen (d->target, d->op0, d->op1, offset));
28304 return true;
28307 /* The NEON VTBL instruction is a fully variable permuation that's even
28308 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28309 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28310 can do slightly better by expanding this as a constant where we don't
28311 have to apply a mask. */
28313 static bool
28314 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
28316 rtx rperm[MAX_VECT_LEN], sel;
28317 machine_mode vmode = d->vmode;
28318 unsigned int i, nelt = d->nelt;
28320 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28321 numbering of elements for big-endian, we must reverse the order. */
28322 if (BYTES_BIG_ENDIAN)
28323 return false;
28325 if (d->testing_p)
28326 return true;
28328 /* Generic code will try constant permutation twice. Once with the
28329 original mode and again with the elements lowered to QImode.
28330 So wait and don't do the selector expansion ourselves. */
28331 if (vmode != V8QImode && vmode != V16QImode)
28332 return false;
28334 for (i = 0; i < nelt; ++i)
28335 rperm[i] = GEN_INT (d->perm[i]);
28336 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
28337 sel = force_reg (vmode, sel);
28339 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
28340 return true;
28343 static bool
28344 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
28346 /* Check if the input mask matches vext before reordering the
28347 operands. */
28348 if (TARGET_NEON)
28349 if (arm_evpc_neon_vext (d))
28350 return true;
28352 /* The pattern matching functions above are written to look for a small
28353 number to begin the sequence (0, 1, N/2). If we begin with an index
28354 from the second operand, we can swap the operands. */
28355 if (d->perm[0] >= d->nelt)
28357 unsigned i, nelt = d->nelt;
28358 rtx x;
28360 for (i = 0; i < nelt; ++i)
28361 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
28363 x = d->op0;
28364 d->op0 = d->op1;
28365 d->op1 = x;
28368 if (TARGET_NEON)
28370 if (arm_evpc_neon_vuzp (d))
28371 return true;
28372 if (arm_evpc_neon_vzip (d))
28373 return true;
28374 if (arm_evpc_neon_vrev (d))
28375 return true;
28376 if (arm_evpc_neon_vtrn (d))
28377 return true;
28378 return arm_evpc_neon_vtbl (d);
28380 return false;
28383 /* Expand a vec_perm_const pattern. */
28385 bool
28386 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
28388 struct expand_vec_perm_d d;
28389 int i, nelt, which;
28391 d.target = target;
28392 d.op0 = op0;
28393 d.op1 = op1;
28395 d.vmode = GET_MODE (target);
28396 gcc_assert (VECTOR_MODE_P (d.vmode));
28397 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28398 d.testing_p = false;
28400 for (i = which = 0; i < nelt; ++i)
28402 rtx e = XVECEXP (sel, 0, i);
28403 int ei = INTVAL (e) & (2 * nelt - 1);
28404 which |= (ei < nelt ? 1 : 2);
28405 d.perm[i] = ei;
28408 switch (which)
28410 default:
28411 gcc_unreachable();
28413 case 3:
28414 d.one_vector_p = false;
28415 if (!rtx_equal_p (op0, op1))
28416 break;
28418 /* The elements of PERM do not suggest that only the first operand
28419 is used, but both operands are identical. Allow easier matching
28420 of the permutation by folding the permutation into the single
28421 input vector. */
28422 /* FALLTHRU */
28423 case 2:
28424 for (i = 0; i < nelt; ++i)
28425 d.perm[i] &= nelt - 1;
28426 d.op0 = op1;
28427 d.one_vector_p = true;
28428 break;
28430 case 1:
28431 d.op1 = op0;
28432 d.one_vector_p = true;
28433 break;
28436 return arm_expand_vec_perm_const_1 (&d);
28439 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
28441 static bool
28442 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
28443 const unsigned char *sel)
28445 struct expand_vec_perm_d d;
28446 unsigned int i, nelt, which;
28447 bool ret;
28449 d.vmode = vmode;
28450 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28451 d.testing_p = true;
28452 memcpy (d.perm, sel, nelt);
28454 /* Categorize the set of elements in the selector. */
28455 for (i = which = 0; i < nelt; ++i)
28457 unsigned char e = d.perm[i];
28458 gcc_assert (e < 2 * nelt);
28459 which |= (e < nelt ? 1 : 2);
28462 /* For all elements from second vector, fold the elements to first. */
28463 if (which == 2)
28464 for (i = 0; i < nelt; ++i)
28465 d.perm[i] -= nelt;
28467 /* Check whether the mask can be applied to the vector type. */
28468 d.one_vector_p = (which != 3);
28470 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
28471 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
28472 if (!d.one_vector_p)
28473 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
28475 start_sequence ();
28476 ret = arm_expand_vec_perm_const_1 (&d);
28477 end_sequence ();
28479 return ret;
28482 bool
28483 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
28485 /* If we are soft float and we do not have ldrd
28486 then all auto increment forms are ok. */
28487 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
28488 return true;
28490 switch (code)
28492 /* Post increment and Pre Decrement are supported for all
28493 instruction forms except for vector forms. */
28494 case ARM_POST_INC:
28495 case ARM_PRE_DEC:
28496 if (VECTOR_MODE_P (mode))
28498 if (code != ARM_PRE_DEC)
28499 return true;
28500 else
28501 return false;
28504 return true;
28506 case ARM_POST_DEC:
28507 case ARM_PRE_INC:
28508 /* Without LDRD and mode size greater than
28509 word size, there is no point in auto-incrementing
28510 because ldm and stm will not have these forms. */
28511 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
28512 return false;
28514 /* Vector and floating point modes do not support
28515 these auto increment forms. */
28516 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
28517 return false;
28519 return true;
28521 default:
28522 return false;
28526 return false;
28529 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28530 on ARM, since we know that shifts by negative amounts are no-ops.
28531 Additionally, the default expansion code is not available or suitable
28532 for post-reload insn splits (this can occur when the register allocator
28533 chooses not to do a shift in NEON).
28535 This function is used in both initial expand and post-reload splits, and
28536 handles all kinds of 64-bit shifts.
28538 Input requirements:
28539 - It is safe for the input and output to be the same register, but
28540 early-clobber rules apply for the shift amount and scratch registers.
28541 - Shift by register requires both scratch registers. In all other cases
28542 the scratch registers may be NULL.
28543 - Ashiftrt by a register also clobbers the CC register. */
28544 void
28545 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
28546 rtx amount, rtx scratch1, rtx scratch2)
28548 rtx out_high = gen_highpart (SImode, out);
28549 rtx out_low = gen_lowpart (SImode, out);
28550 rtx in_high = gen_highpart (SImode, in);
28551 rtx in_low = gen_lowpart (SImode, in);
28553 /* Terminology:
28554 in = the register pair containing the input value.
28555 out = the destination register pair.
28556 up = the high- or low-part of each pair.
28557 down = the opposite part to "up".
28558 In a shift, we can consider bits to shift from "up"-stream to
28559 "down"-stream, so in a left-shift "up" is the low-part and "down"
28560 is the high-part of each register pair. */
28562 rtx out_up = code == ASHIFT ? out_low : out_high;
28563 rtx out_down = code == ASHIFT ? out_high : out_low;
28564 rtx in_up = code == ASHIFT ? in_low : in_high;
28565 rtx in_down = code == ASHIFT ? in_high : in_low;
28567 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
28568 gcc_assert (out
28569 && (REG_P (out) || GET_CODE (out) == SUBREG)
28570 && GET_MODE (out) == DImode);
28571 gcc_assert (in
28572 && (REG_P (in) || GET_CODE (in) == SUBREG)
28573 && GET_MODE (in) == DImode);
28574 gcc_assert (amount
28575 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
28576 && GET_MODE (amount) == SImode)
28577 || CONST_INT_P (amount)));
28578 gcc_assert (scratch1 == NULL
28579 || (GET_CODE (scratch1) == SCRATCH)
28580 || (GET_MODE (scratch1) == SImode
28581 && REG_P (scratch1)));
28582 gcc_assert (scratch2 == NULL
28583 || (GET_CODE (scratch2) == SCRATCH)
28584 || (GET_MODE (scratch2) == SImode
28585 && REG_P (scratch2)));
28586 gcc_assert (!REG_P (out) || !REG_P (amount)
28587 || !HARD_REGISTER_P (out)
28588 || (REGNO (out) != REGNO (amount)
28589 && REGNO (out) + 1 != REGNO (amount)));
28591 /* Macros to make following code more readable. */
28592 #define SUB_32(DEST,SRC) \
28593 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28594 #define RSB_32(DEST,SRC) \
28595 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28596 #define SUB_S_32(DEST,SRC) \
28597 gen_addsi3_compare0 ((DEST), (SRC), \
28598 GEN_INT (-32))
28599 #define SET(DEST,SRC) \
28600 gen_rtx_SET (SImode, (DEST), (SRC))
28601 #define SHIFT(CODE,SRC,AMOUNT) \
28602 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28603 #define LSHIFT(CODE,SRC,AMOUNT) \
28604 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28605 SImode, (SRC), (AMOUNT))
28606 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28607 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28608 SImode, (SRC), (AMOUNT))
28609 #define ORR(A,B) \
28610 gen_rtx_IOR (SImode, (A), (B))
28611 #define BRANCH(COND,LABEL) \
28612 gen_arm_cond_branch ((LABEL), \
28613 gen_rtx_ ## COND (CCmode, cc_reg, \
28614 const0_rtx), \
28615 cc_reg)
28617 /* Shifts by register and shifts by constant are handled separately. */
28618 if (CONST_INT_P (amount))
28620 /* We have a shift-by-constant. */
28622 /* First, handle out-of-range shift amounts.
28623 In both cases we try to match the result an ARM instruction in a
28624 shift-by-register would give. This helps reduce execution
28625 differences between optimization levels, but it won't stop other
28626 parts of the compiler doing different things. This is "undefined
28627 behaviour, in any case. */
28628 if (INTVAL (amount) <= 0)
28629 emit_insn (gen_movdi (out, in));
28630 else if (INTVAL (amount) >= 64)
28632 if (code == ASHIFTRT)
28634 rtx const31_rtx = GEN_INT (31);
28635 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
28636 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
28638 else
28639 emit_insn (gen_movdi (out, const0_rtx));
28642 /* Now handle valid shifts. */
28643 else if (INTVAL (amount) < 32)
28645 /* Shifts by a constant less than 32. */
28646 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
28648 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28649 emit_insn (SET (out_down,
28650 ORR (REV_LSHIFT (code, in_up, reverse_amount),
28651 out_down)));
28652 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28654 else
28656 /* Shifts by a constant greater than 31. */
28657 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
28659 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
28660 if (code == ASHIFTRT)
28661 emit_insn (gen_ashrsi3 (out_up, in_up,
28662 GEN_INT (31)));
28663 else
28664 emit_insn (SET (out_up, const0_rtx));
28667 else
28669 /* We have a shift-by-register. */
28670 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
28672 /* This alternative requires the scratch registers. */
28673 gcc_assert (scratch1 && REG_P (scratch1));
28674 gcc_assert (scratch2 && REG_P (scratch2));
28676 /* We will need the values "amount-32" and "32-amount" later.
28677 Swapping them around now allows the later code to be more general. */
28678 switch (code)
28680 case ASHIFT:
28681 emit_insn (SUB_32 (scratch1, amount));
28682 emit_insn (RSB_32 (scratch2, amount));
28683 break;
28684 case ASHIFTRT:
28685 emit_insn (RSB_32 (scratch1, amount));
28686 /* Also set CC = amount > 32. */
28687 emit_insn (SUB_S_32 (scratch2, amount));
28688 break;
28689 case LSHIFTRT:
28690 emit_insn (RSB_32 (scratch1, amount));
28691 emit_insn (SUB_32 (scratch2, amount));
28692 break;
28693 default:
28694 gcc_unreachable ();
28697 /* Emit code like this:
28699 arithmetic-left:
28700 out_down = in_down << amount;
28701 out_down = (in_up << (amount - 32)) | out_down;
28702 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28703 out_up = in_up << amount;
28705 arithmetic-right:
28706 out_down = in_down >> amount;
28707 out_down = (in_up << (32 - amount)) | out_down;
28708 if (amount < 32)
28709 out_down = ((signed)in_up >> (amount - 32)) | out_down;
28710 out_up = in_up << amount;
28712 logical-right:
28713 out_down = in_down >> amount;
28714 out_down = (in_up << (32 - amount)) | out_down;
28715 if (amount < 32)
28716 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28717 out_up = in_up << amount;
28719 The ARM and Thumb2 variants are the same but implemented slightly
28720 differently. If this were only called during expand we could just
28721 use the Thumb2 case and let combine do the right thing, but this
28722 can also be called from post-reload splitters. */
28724 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28726 if (!TARGET_THUMB2)
28728 /* Emit code for ARM mode. */
28729 emit_insn (SET (out_down,
28730 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
28731 if (code == ASHIFTRT)
28733 rtx_code_label *done_label = gen_label_rtx ();
28734 emit_jump_insn (BRANCH (LT, done_label));
28735 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
28736 out_down)));
28737 emit_label (done_label);
28739 else
28740 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
28741 out_down)));
28743 else
28745 /* Emit code for Thumb2 mode.
28746 Thumb2 can't do shift and or in one insn. */
28747 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
28748 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
28750 if (code == ASHIFTRT)
28752 rtx_code_label *done_label = gen_label_rtx ();
28753 emit_jump_insn (BRANCH (LT, done_label));
28754 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
28755 emit_insn (SET (out_down, ORR (out_down, scratch2)));
28756 emit_label (done_label);
28758 else
28760 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
28761 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
28765 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28768 #undef SUB_32
28769 #undef RSB_32
28770 #undef SUB_S_32
28771 #undef SET
28772 #undef SHIFT
28773 #undef LSHIFT
28774 #undef REV_LSHIFT
28775 #undef ORR
28776 #undef BRANCH
28780 /* Returns true if a valid comparison operation and makes
28781 the operands in a form that is valid. */
28782 bool
28783 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
28785 enum rtx_code code = GET_CODE (*comparison);
28786 int code_int;
28787 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
28788 ? GET_MODE (*op2) : GET_MODE (*op1);
28790 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
28792 if (code == UNEQ || code == LTGT)
28793 return false;
28795 code_int = (int)code;
28796 arm_canonicalize_comparison (&code_int, op1, op2, 0);
28797 PUT_CODE (*comparison, (enum rtx_code)code_int);
28799 switch (mode)
28801 case SImode:
28802 if (!arm_add_operand (*op1, mode))
28803 *op1 = force_reg (mode, *op1);
28804 if (!arm_add_operand (*op2, mode))
28805 *op2 = force_reg (mode, *op2);
28806 return true;
28808 case DImode:
28809 if (!cmpdi_operand (*op1, mode))
28810 *op1 = force_reg (mode, *op1);
28811 if (!cmpdi_operand (*op2, mode))
28812 *op2 = force_reg (mode, *op2);
28813 return true;
28815 case SFmode:
28816 case DFmode:
28817 if (!arm_float_compare_operand (*op1, mode))
28818 *op1 = force_reg (mode, *op1);
28819 if (!arm_float_compare_operand (*op2, mode))
28820 *op2 = force_reg (mode, *op2);
28821 return true;
28822 default:
28823 break;
28826 return false;
28830 /* Maximum number of instructions to set block of memory. */
28831 static int
28832 arm_block_set_max_insns (void)
28834 if (optimize_function_for_size_p (cfun))
28835 return 4;
28836 else
28837 return current_tune->max_insns_inline_memset;
28840 /* Return TRUE if it's profitable to set block of memory for
28841 non-vectorized case. VAL is the value to set the memory
28842 with. LENGTH is the number of bytes to set. ALIGN is the
28843 alignment of the destination memory in bytes. UNALIGNED_P
28844 is TRUE if we can only set the memory with instructions
28845 meeting alignment requirements. USE_STRD_P is TRUE if we
28846 can use strd to set the memory. */
28847 static bool
28848 arm_block_set_non_vect_profit_p (rtx val,
28849 unsigned HOST_WIDE_INT length,
28850 unsigned HOST_WIDE_INT align,
28851 bool unaligned_p, bool use_strd_p)
28853 int num = 0;
28854 /* For leftovers in bytes of 0-7, we can set the memory block using
28855 strb/strh/str with minimum instruction number. */
28856 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
28858 if (unaligned_p)
28860 num = arm_const_inline_cost (SET, val);
28861 num += length / align + length % align;
28863 else if (use_strd_p)
28865 num = arm_const_double_inline_cost (val);
28866 num += (length >> 3) + leftover[length & 7];
28868 else
28870 num = arm_const_inline_cost (SET, val);
28871 num += (length >> 2) + leftover[length & 3];
28874 /* We may be able to combine last pair STRH/STRB into a single STR
28875 by shifting one byte back. */
28876 if (unaligned_access && length > 3 && (length & 3) == 3)
28877 num--;
28879 return (num <= arm_block_set_max_insns ());
28882 /* Return TRUE if it's profitable to set block of memory for
28883 vectorized case. LENGTH is the number of bytes to set.
28884 ALIGN is the alignment of destination memory in bytes.
28885 MODE is the vector mode used to set the memory. */
28886 static bool
28887 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
28888 unsigned HOST_WIDE_INT align,
28889 machine_mode mode)
28891 int num;
28892 bool unaligned_p = ((align & 3) != 0);
28893 unsigned int nelt = GET_MODE_NUNITS (mode);
28895 /* Instruction loading constant value. */
28896 num = 1;
28897 /* Instructions storing the memory. */
28898 num += (length + nelt - 1) / nelt;
28899 /* Instructions adjusting the address expression. Only need to
28900 adjust address expression if it's 4 bytes aligned and bytes
28901 leftover can only be stored by mis-aligned store instruction. */
28902 if (!unaligned_p && (length & 3) != 0)
28903 num++;
28905 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
28906 if (!unaligned_p && mode == V16QImode)
28907 num--;
28909 return (num <= arm_block_set_max_insns ());
28912 /* Set a block of memory using vectorization instructions for the
28913 unaligned case. We fill the first LENGTH bytes of the memory
28914 area starting from DSTBASE with byte constant VALUE. ALIGN is
28915 the alignment requirement of memory. Return TRUE if succeeded. */
28916 static bool
28917 arm_block_set_unaligned_vect (rtx dstbase,
28918 unsigned HOST_WIDE_INT length,
28919 unsigned HOST_WIDE_INT value,
28920 unsigned HOST_WIDE_INT align)
28922 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
28923 rtx dst, mem;
28924 rtx val_elt, val_vec, reg;
28925 rtx rval[MAX_VECT_LEN];
28926 rtx (*gen_func) (rtx, rtx);
28927 machine_mode mode;
28928 unsigned HOST_WIDE_INT v = value;
28930 gcc_assert ((align & 0x3) != 0);
28931 nelt_v8 = GET_MODE_NUNITS (V8QImode);
28932 nelt_v16 = GET_MODE_NUNITS (V16QImode);
28933 if (length >= nelt_v16)
28935 mode = V16QImode;
28936 gen_func = gen_movmisalignv16qi;
28938 else
28940 mode = V8QImode;
28941 gen_func = gen_movmisalignv8qi;
28943 nelt_mode = GET_MODE_NUNITS (mode);
28944 gcc_assert (length >= nelt_mode);
28945 /* Skip if it isn't profitable. */
28946 if (!arm_block_set_vect_profit_p (length, align, mode))
28947 return false;
28949 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28950 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28952 v = sext_hwi (v, BITS_PER_WORD);
28953 val_elt = GEN_INT (v);
28954 for (j = 0; j < nelt_mode; j++)
28955 rval[j] = val_elt;
28957 reg = gen_reg_rtx (mode);
28958 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28959 /* Emit instruction loading the constant value. */
28960 emit_move_insn (reg, val_vec);
28962 /* Handle nelt_mode bytes in a vector. */
28963 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
28965 emit_insn ((*gen_func) (mem, reg));
28966 if (i + 2 * nelt_mode <= length)
28967 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
28970 /* If there are not less than nelt_v8 bytes leftover, we must be in
28971 V16QI mode. */
28972 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
28974 /* Handle (8, 16) bytes leftover. */
28975 if (i + nelt_v8 < length)
28977 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
28978 /* We are shifting bytes back, set the alignment accordingly. */
28979 if ((length & 1) != 0 && align >= 2)
28980 set_mem_align (mem, BITS_PER_UNIT);
28982 emit_insn (gen_movmisalignv16qi (mem, reg));
28984 /* Handle (0, 8] bytes leftover. */
28985 else if (i < length && i + nelt_v8 >= length)
28987 if (mode == V16QImode)
28989 reg = gen_lowpart (V8QImode, reg);
28990 mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
28992 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
28993 + (nelt_mode - nelt_v8))));
28994 /* We are shifting bytes back, set the alignment accordingly. */
28995 if ((length & 1) != 0 && align >= 2)
28996 set_mem_align (mem, BITS_PER_UNIT);
28998 emit_insn (gen_movmisalignv8qi (mem, reg));
29001 return true;
29004 /* Set a block of memory using vectorization instructions for the
29005 aligned case. We fill the first LENGTH bytes of the memory area
29006 starting from DSTBASE with byte constant VALUE. ALIGN is the
29007 alignment requirement of memory. Return TRUE if succeeded. */
29008 static bool
29009 arm_block_set_aligned_vect (rtx dstbase,
29010 unsigned HOST_WIDE_INT length,
29011 unsigned HOST_WIDE_INT value,
29012 unsigned HOST_WIDE_INT align)
29014 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29015 rtx dst, addr, mem;
29016 rtx val_elt, val_vec, reg;
29017 rtx rval[MAX_VECT_LEN];
29018 machine_mode mode;
29019 unsigned HOST_WIDE_INT v = value;
29021 gcc_assert ((align & 0x3) == 0);
29022 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29023 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29024 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29025 mode = V16QImode;
29026 else
29027 mode = V8QImode;
29029 nelt_mode = GET_MODE_NUNITS (mode);
29030 gcc_assert (length >= nelt_mode);
29031 /* Skip if it isn't profitable. */
29032 if (!arm_block_set_vect_profit_p (length, align, mode))
29033 return false;
29035 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29037 v = sext_hwi (v, BITS_PER_WORD);
29038 val_elt = GEN_INT (v);
29039 for (j = 0; j < nelt_mode; j++)
29040 rval[j] = val_elt;
29042 reg = gen_reg_rtx (mode);
29043 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29044 /* Emit instruction loading the constant value. */
29045 emit_move_insn (reg, val_vec);
29047 i = 0;
29048 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29049 if (mode == V16QImode)
29051 mem = adjust_automodify_address (dstbase, mode, dst, 0);
29052 emit_insn (gen_movmisalignv16qi (mem, reg));
29053 i += nelt_mode;
29054 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29055 if (i + nelt_v8 < length && i + nelt_v16 > length)
29057 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29058 mem = adjust_automodify_address (dstbase, mode, dst, 0);
29059 /* We are shifting bytes back, set the alignment accordingly. */
29060 if ((length & 0x3) == 0)
29061 set_mem_align (mem, BITS_PER_UNIT * 4);
29062 else if ((length & 0x1) == 0)
29063 set_mem_align (mem, BITS_PER_UNIT * 2);
29064 else
29065 set_mem_align (mem, BITS_PER_UNIT);
29067 emit_insn (gen_movmisalignv16qi (mem, reg));
29068 return true;
29070 /* Fall through for bytes leftover. */
29071 mode = V8QImode;
29072 nelt_mode = GET_MODE_NUNITS (mode);
29073 reg = gen_lowpart (V8QImode, reg);
29076 /* Handle 8 bytes in a vector. */
29077 for (; (i + nelt_mode <= length); i += nelt_mode)
29079 addr = plus_constant (Pmode, dst, i);
29080 mem = adjust_automodify_address (dstbase, mode, addr, i);
29081 emit_move_insn (mem, reg);
29084 /* Handle single word leftover by shifting 4 bytes back. We can
29085 use aligned access for this case. */
29086 if (i + UNITS_PER_WORD == length)
29088 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
29089 mem = adjust_automodify_address (dstbase, mode,
29090 addr, i - UNITS_PER_WORD);
29091 /* We are shifting 4 bytes back, set the alignment accordingly. */
29092 if (align > UNITS_PER_WORD)
29093 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
29095 emit_move_insn (mem, reg);
29097 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29098 We have to use unaligned access for this case. */
29099 else if (i < length)
29101 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29102 mem = adjust_automodify_address (dstbase, mode, dst, 0);
29103 /* We are shifting bytes back, set the alignment accordingly. */
29104 if ((length & 1) == 0)
29105 set_mem_align (mem, BITS_PER_UNIT * 2);
29106 else
29107 set_mem_align (mem, BITS_PER_UNIT);
29109 emit_insn (gen_movmisalignv8qi (mem, reg));
29112 return true;
29115 /* Set a block of memory using plain strh/strb instructions, only
29116 using instructions allowed by ALIGN on processor. We fill the
29117 first LENGTH bytes of the memory area starting from DSTBASE
29118 with byte constant VALUE. ALIGN is the alignment requirement
29119 of memory. */
29120 static bool
29121 arm_block_set_unaligned_non_vect (rtx dstbase,
29122 unsigned HOST_WIDE_INT length,
29123 unsigned HOST_WIDE_INT value,
29124 unsigned HOST_WIDE_INT align)
29126 unsigned int i;
29127 rtx dst, addr, mem;
29128 rtx val_exp, val_reg, reg;
29129 machine_mode mode;
29130 HOST_WIDE_INT v = value;
29132 gcc_assert (align == 1 || align == 2);
29134 if (align == 2)
29135 v |= (value << BITS_PER_UNIT);
29137 v = sext_hwi (v, BITS_PER_WORD);
29138 val_exp = GEN_INT (v);
29139 /* Skip if it isn't profitable. */
29140 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29141 align, true, false))
29142 return false;
29144 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29145 mode = (align == 2 ? HImode : QImode);
29146 val_reg = force_reg (SImode, val_exp);
29147 reg = gen_lowpart (mode, val_reg);
29149 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
29151 addr = plus_constant (Pmode, dst, i);
29152 mem = adjust_automodify_address (dstbase, mode, addr, i);
29153 emit_move_insn (mem, reg);
29156 /* Handle single byte leftover. */
29157 if (i + 1 == length)
29159 reg = gen_lowpart (QImode, val_reg);
29160 addr = plus_constant (Pmode, dst, i);
29161 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29162 emit_move_insn (mem, reg);
29163 i++;
29166 gcc_assert (i == length);
29167 return true;
29170 /* Set a block of memory using plain strd/str/strh/strb instructions,
29171 to permit unaligned copies on processors which support unaligned
29172 semantics for those instructions. We fill the first LENGTH bytes
29173 of the memory area starting from DSTBASE with byte constant VALUE.
29174 ALIGN is the alignment requirement of memory. */
29175 static bool
29176 arm_block_set_aligned_non_vect (rtx dstbase,
29177 unsigned HOST_WIDE_INT length,
29178 unsigned HOST_WIDE_INT value,
29179 unsigned HOST_WIDE_INT align)
29181 unsigned int i;
29182 rtx dst, addr, mem;
29183 rtx val_exp, val_reg, reg;
29184 unsigned HOST_WIDE_INT v;
29185 bool use_strd_p;
29187 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
29188 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
29190 v = (value | (value << 8) | (value << 16) | (value << 24));
29191 if (length < UNITS_PER_WORD)
29192 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
29194 if (use_strd_p)
29195 v |= (v << BITS_PER_WORD);
29196 else
29197 v = sext_hwi (v, BITS_PER_WORD);
29199 val_exp = GEN_INT (v);
29200 /* Skip if it isn't profitable. */
29201 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29202 align, false, use_strd_p))
29204 if (!use_strd_p)
29205 return false;
29207 /* Try without strd. */
29208 v = (v >> BITS_PER_WORD);
29209 v = sext_hwi (v, BITS_PER_WORD);
29210 val_exp = GEN_INT (v);
29211 use_strd_p = false;
29212 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29213 align, false, use_strd_p))
29214 return false;
29217 i = 0;
29218 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29219 /* Handle double words using strd if possible. */
29220 if (use_strd_p)
29222 val_reg = force_reg (DImode, val_exp);
29223 reg = val_reg;
29224 for (; (i + 8 <= length); i += 8)
29226 addr = plus_constant (Pmode, dst, i);
29227 mem = adjust_automodify_address (dstbase, DImode, addr, i);
29228 emit_move_insn (mem, reg);
29231 else
29232 val_reg = force_reg (SImode, val_exp);
29234 /* Handle words. */
29235 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
29236 for (; (i + 4 <= length); i += 4)
29238 addr = plus_constant (Pmode, dst, i);
29239 mem = adjust_automodify_address (dstbase, SImode, addr, i);
29240 if ((align & 3) == 0)
29241 emit_move_insn (mem, reg);
29242 else
29243 emit_insn (gen_unaligned_storesi (mem, reg));
29246 /* Merge last pair of STRH and STRB into a STR if possible. */
29247 if (unaligned_access && i > 0 && (i + 3) == length)
29249 addr = plus_constant (Pmode, dst, i - 1);
29250 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
29251 /* We are shifting one byte back, set the alignment accordingly. */
29252 if ((align & 1) == 0)
29253 set_mem_align (mem, BITS_PER_UNIT);
29255 /* Most likely this is an unaligned access, and we can't tell at
29256 compilation time. */
29257 emit_insn (gen_unaligned_storesi (mem, reg));
29258 return true;
29261 /* Handle half word leftover. */
29262 if (i + 2 <= length)
29264 reg = gen_lowpart (HImode, val_reg);
29265 addr = plus_constant (Pmode, dst, i);
29266 mem = adjust_automodify_address (dstbase, HImode, addr, i);
29267 if ((align & 1) == 0)
29268 emit_move_insn (mem, reg);
29269 else
29270 emit_insn (gen_unaligned_storehi (mem, reg));
29272 i += 2;
29275 /* Handle single byte leftover. */
29276 if (i + 1 == length)
29278 reg = gen_lowpart (QImode, val_reg);
29279 addr = plus_constant (Pmode, dst, i);
29280 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29281 emit_move_insn (mem, reg);
29284 return true;
29287 /* Set a block of memory using vectorization instructions for both
29288 aligned and unaligned cases. We fill the first LENGTH bytes of
29289 the memory area starting from DSTBASE with byte constant VALUE.
29290 ALIGN is the alignment requirement of memory. */
29291 static bool
29292 arm_block_set_vect (rtx dstbase,
29293 unsigned HOST_WIDE_INT length,
29294 unsigned HOST_WIDE_INT value,
29295 unsigned HOST_WIDE_INT align)
29297 /* Check whether we need to use unaligned store instruction. */
29298 if (((align & 3) != 0 || (length & 3) != 0)
29299 /* Check whether unaligned store instruction is available. */
29300 && (!unaligned_access || BYTES_BIG_ENDIAN))
29301 return false;
29303 if ((align & 3) == 0)
29304 return arm_block_set_aligned_vect (dstbase, length, value, align);
29305 else
29306 return arm_block_set_unaligned_vect (dstbase, length, value, align);
29309 /* Expand string store operation. Firstly we try to do that by using
29310 vectorization instructions, then try with ARM unaligned access and
29311 double-word store if profitable. OPERANDS[0] is the destination,
29312 OPERANDS[1] is the number of bytes, operands[2] is the value to
29313 initialize the memory, OPERANDS[3] is the known alignment of the
29314 destination. */
29315 bool
29316 arm_gen_setmem (rtx *operands)
29318 rtx dstbase = operands[0];
29319 unsigned HOST_WIDE_INT length;
29320 unsigned HOST_WIDE_INT value;
29321 unsigned HOST_WIDE_INT align;
29323 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
29324 return false;
29326 length = UINTVAL (operands[1]);
29327 if (length > 64)
29328 return false;
29330 value = (UINTVAL (operands[2]) & 0xFF);
29331 align = UINTVAL (operands[3]);
29332 if (TARGET_NEON && length >= 8
29333 && current_tune->string_ops_prefer_neon
29334 && arm_block_set_vect (dstbase, length, value, align))
29335 return true;
29337 if (!unaligned_access && (align & 3) != 0)
29338 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
29340 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
29344 static bool
29345 arm_macro_fusion_p (void)
29347 return current_tune->fuseable_ops != ARM_FUSE_NOTHING;
29351 static bool
29352 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
29354 rtx set_dest;
29355 rtx prev_set = single_set (prev);
29356 rtx curr_set = single_set (curr);
29358 if (!prev_set
29359 || !curr_set)
29360 return false;
29362 if (any_condjump_p (curr))
29363 return false;
29365 if (!arm_macro_fusion_p ())
29366 return false;
29368 if (current_tune->fuseable_ops & ARM_FUSE_MOVW_MOVT)
29370 /* We are trying to fuse
29371 movw imm / movt imm
29372 instructions as a group that gets scheduled together. */
29374 set_dest = SET_DEST (curr_set);
29376 if (GET_MODE (set_dest) != SImode)
29377 return false;
29379 /* We are trying to match:
29380 prev (movw) == (set (reg r0) (const_int imm16))
29381 curr (movt) == (set (zero_extract (reg r0)
29382 (const_int 16)
29383 (const_int 16))
29384 (const_int imm16_1))
29386 prev (movw) == (set (reg r1)
29387 (high (symbol_ref ("SYM"))))
29388 curr (movt) == (set (reg r0)
29389 (lo_sum (reg r1)
29390 (symbol_ref ("SYM")))) */
29391 if (GET_CODE (set_dest) == ZERO_EXTRACT)
29393 if (CONST_INT_P (SET_SRC (curr_set))
29394 && CONST_INT_P (SET_SRC (prev_set))
29395 && REG_P (XEXP (set_dest, 0))
29396 && REG_P (SET_DEST (prev_set))
29397 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
29398 return true;
29400 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
29401 && REG_P (SET_DEST (curr_set))
29402 && REG_P (SET_DEST (prev_set))
29403 && GET_CODE (SET_SRC (prev_set)) == HIGH
29404 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
29405 return true;
29407 return false;
29410 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
29412 static unsigned HOST_WIDE_INT
29413 arm_asan_shadow_offset (void)
29415 return (unsigned HOST_WIDE_INT) 1 << 29;
29419 /* This is a temporary fix for PR60655. Ideally we need
29420 to handle most of these cases in the generic part but
29421 currently we reject minus (..) (sym_ref). We try to
29422 ameliorate the case with minus (sym_ref1) (sym_ref2)
29423 where they are in the same section. */
29425 static bool
29426 arm_const_not_ok_for_debug_p (rtx p)
29428 tree decl_op0 = NULL;
29429 tree decl_op1 = NULL;
29431 if (GET_CODE (p) == MINUS)
29433 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
29435 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
29436 if (decl_op1
29437 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
29438 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
29440 if ((TREE_CODE (decl_op1) == VAR_DECL
29441 || TREE_CODE (decl_op1) == CONST_DECL)
29442 && (TREE_CODE (decl_op0) == VAR_DECL
29443 || TREE_CODE (decl_op0) == CONST_DECL))
29444 return (get_variable_section (decl_op1, false)
29445 != get_variable_section (decl_op0, false));
29447 if (TREE_CODE (decl_op1) == LABEL_DECL
29448 && TREE_CODE (decl_op0) == LABEL_DECL)
29449 return (DECL_CONTEXT (decl_op1)
29450 != DECL_CONTEXT (decl_op0));
29453 return true;
29457 return false;
29460 /* return TRUE if x is a reference to a value in a constant pool */
29461 extern bool
29462 arm_is_constant_pool_ref (rtx x)
29464 return (MEM_P (x)
29465 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
29466 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
29469 /* If MEM is in the form of [base+offset], extract the two parts
29470 of address and set to BASE and OFFSET, otherwise return false
29471 after clearing BASE and OFFSET. */
29473 static bool
29474 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
29476 rtx addr;
29478 gcc_assert (MEM_P (mem));
29480 addr = XEXP (mem, 0);
29482 /* Strip off const from addresses like (const (addr)). */
29483 if (GET_CODE (addr) == CONST)
29484 addr = XEXP (addr, 0);
29486 if (GET_CODE (addr) == REG)
29488 *base = addr;
29489 *offset = const0_rtx;
29490 return true;
29493 if (GET_CODE (addr) == PLUS
29494 && GET_CODE (XEXP (addr, 0)) == REG
29495 && CONST_INT_P (XEXP (addr, 1)))
29497 *base = XEXP (addr, 0);
29498 *offset = XEXP (addr, 1);
29499 return true;
29502 *base = NULL_RTX;
29503 *offset = NULL_RTX;
29505 return false;
29508 /* If INSN is a load or store of address in the form of [base+offset],
29509 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
29510 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
29511 otherwise return FALSE. */
29513 static bool
29514 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
29516 rtx x, dest, src;
29518 gcc_assert (INSN_P (insn));
29519 x = PATTERN (insn);
29520 if (GET_CODE (x) != SET)
29521 return false;
29523 src = SET_SRC (x);
29524 dest = SET_DEST (x);
29525 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
29527 *is_load = false;
29528 extract_base_offset_in_addr (dest, base, offset);
29530 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
29532 *is_load = true;
29533 extract_base_offset_in_addr (src, base, offset);
29535 else
29536 return false;
29538 return (*base != NULL_RTX && *offset != NULL_RTX);
29541 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
29543 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
29544 and PRI are only calculated for these instructions. For other instruction,
29545 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
29546 instruction fusion can be supported by returning different priorities.
29548 It's important that irrelevant instructions get the largest FUSION_PRI. */
29550 static void
29551 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
29552 int *fusion_pri, int *pri)
29554 int tmp, off_val;
29555 bool is_load;
29556 rtx base, offset;
29558 gcc_assert (INSN_P (insn));
29560 tmp = max_pri - 1;
29561 if (!fusion_load_store (insn, &base, &offset, &is_load))
29563 *pri = tmp;
29564 *fusion_pri = tmp;
29565 return;
29568 /* Load goes first. */
29569 if (is_load)
29570 *fusion_pri = tmp - 1;
29571 else
29572 *fusion_pri = tmp - 2;
29574 tmp /= 2;
29576 /* INSN with smaller base register goes first. */
29577 tmp -= ((REGNO (base) & 0xff) << 20);
29579 /* INSN with smaller offset goes first. */
29580 off_val = (int)(INTVAL (offset));
29581 if (off_val >= 0)
29582 tmp -= (off_val & 0xfffff);
29583 else
29584 tmp += ((- off_val) & 0xfffff);
29586 *pri = tmp;
29587 return;
29589 #include "gt-arm.h"