2015-05-04 Andreas Tobler <andreast@gcc.gnu.org>
[official-gcc.git] / gcc / config / arm / arm.c
blob9f808be41af66af93c947e9a8a3f10a14e6a1137
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2015 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "hash-table.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "hash-set.h"
30 #include "machmode.h"
31 #include "vec.h"
32 #include "double-int.h"
33 #include "input.h"
34 #include "alias.h"
35 #include "symtab.h"
36 #include "wide-int.h"
37 #include "inchash.h"
38 #include "tree.h"
39 #include "fold-const.h"
40 #include "stringpool.h"
41 #include "stor-layout.h"
42 #include "calls.h"
43 #include "varasm.h"
44 #include "obstack.h"
45 #include "regs.h"
46 #include "hard-reg-set.h"
47 #include "insn-config.h"
48 #include "conditions.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "flags.h"
52 #include "reload.h"
53 #include "function.h"
54 #include "hashtab.h"
55 #include "statistics.h"
56 #include "real.h"
57 #include "fixed-value.h"
58 #include "expmed.h"
59 #include "dojump.h"
60 #include "explow.h"
61 #include "emit-rtl.h"
62 #include "stmt.h"
63 #include "expr.h"
64 #include "insn-codes.h"
65 #include "optabs.h"
66 #include "diagnostic-core.h"
67 #include "recog.h"
68 #include "predict.h"
69 #include "dominance.h"
70 #include "cfg.h"
71 #include "cfgrtl.h"
72 #include "cfganal.h"
73 #include "lcm.h"
74 #include "cfgbuild.h"
75 #include "cfgcleanup.h"
76 #include "basic-block.h"
77 #include "hash-map.h"
78 #include "is-a.h"
79 #include "plugin-api.h"
80 #include "ipa-ref.h"
81 #include "cgraph.h"
82 #include "ggc.h"
83 #include "except.h"
84 #include "tm_p.h"
85 #include "target.h"
86 #include "sched-int.h"
87 #include "target-def.h"
88 #include "debug.h"
89 #include "langhooks.h"
90 #include "df.h"
91 #include "intl.h"
92 #include "libfuncs.h"
93 #include "params.h"
94 #include "opts.h"
95 #include "dumpfile.h"
96 #include "gimple-expr.h"
97 #include "builtins.h"
98 #include "tm-constrs.h"
99 #include "rtl-iter.h"
100 #include "sched-int.h"
102 /* Forward definitions of types. */
103 typedef struct minipool_node Mnode;
104 typedef struct minipool_fixup Mfix;
106 void (*arm_lang_output_object_attributes_hook)(void);
108 struct four_ints
110 int i[4];
113 /* Forward function declarations. */
114 static bool arm_const_not_ok_for_debug_p (rtx);
115 static bool arm_needs_doubleword_align (machine_mode, const_tree);
116 static int arm_compute_static_chain_stack_bytes (void);
117 static arm_stack_offsets *arm_get_frame_offsets (void);
118 static void arm_add_gc_roots (void);
119 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
120 HOST_WIDE_INT, rtx, rtx, int, int);
121 static unsigned bit_count (unsigned long);
122 static int arm_address_register_rtx_p (rtx, int);
123 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
124 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
125 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
126 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
127 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
128 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
129 inline static int thumb1_index_register_rtx_p (rtx, int);
130 static int thumb_far_jump_used_p (void);
131 static bool thumb_force_lr_save (void);
132 static unsigned arm_size_return_regs (void);
133 static bool arm_assemble_integer (rtx, unsigned int, int);
134 static void arm_print_operand (FILE *, rtx, int);
135 static void arm_print_operand_address (FILE *, rtx);
136 static bool arm_print_operand_punct_valid_p (unsigned char code);
137 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
138 static arm_cc get_arm_condition_code (rtx);
139 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
140 static const char *output_multi_immediate (rtx *, const char *, const char *,
141 int, HOST_WIDE_INT);
142 static const char *shift_op (rtx, HOST_WIDE_INT *);
143 static struct machine_function *arm_init_machine_status (void);
144 static void thumb_exit (FILE *, int);
145 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
146 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
147 static Mnode *add_minipool_forward_ref (Mfix *);
148 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
149 static Mnode *add_minipool_backward_ref (Mfix *);
150 static void assign_minipool_offsets (Mfix *);
151 static void arm_print_value (FILE *, rtx);
152 static void dump_minipool (rtx_insn *);
153 static int arm_barrier_cost (rtx_insn *);
154 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
155 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
156 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
157 machine_mode, rtx);
158 static void arm_reorg (void);
159 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
160 static unsigned long arm_compute_save_reg0_reg12_mask (void);
161 static unsigned long arm_compute_save_reg_mask (void);
162 static unsigned long arm_isr_value (tree);
163 static unsigned long arm_compute_func_type (void);
164 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
165 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
166 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
167 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
168 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
169 #endif
170 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
171 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
172 static int arm_comp_type_attributes (const_tree, const_tree);
173 static void arm_set_default_type_attributes (tree);
174 static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
175 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
176 static int optimal_immediate_sequence (enum rtx_code code,
177 unsigned HOST_WIDE_INT val,
178 struct four_ints *return_sequence);
179 static int optimal_immediate_sequence_1 (enum rtx_code code,
180 unsigned HOST_WIDE_INT val,
181 struct four_ints *return_sequence,
182 int i);
183 static int arm_get_strip_length (int);
184 static bool arm_function_ok_for_sibcall (tree, tree);
185 static machine_mode arm_promote_function_mode (const_tree,
186 machine_mode, int *,
187 const_tree, int);
188 static bool arm_return_in_memory (const_tree, const_tree);
189 static rtx arm_function_value (const_tree, const_tree, bool);
190 static rtx arm_libcall_value_1 (machine_mode);
191 static rtx arm_libcall_value (machine_mode, const_rtx);
192 static bool arm_function_value_regno_p (const unsigned int);
193 static void arm_internal_label (FILE *, const char *, unsigned long);
194 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
195 tree);
196 static bool arm_have_conditional_execution (void);
197 static bool arm_cannot_force_const_mem (machine_mode, rtx);
198 static bool arm_legitimate_constant_p (machine_mode, rtx);
199 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
200 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
201 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
202 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
203 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
204 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
205 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
206 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
207 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
208 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
209 static void emit_constant_insn (rtx cond, rtx pattern);
210 static rtx_insn *emit_set_insn (rtx, rtx);
211 static rtx emit_multi_reg_push (unsigned long, unsigned long);
212 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
213 tree, bool);
214 static rtx arm_function_arg (cumulative_args_t, machine_mode,
215 const_tree, bool);
216 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
217 const_tree, bool);
218 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
219 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
220 const_tree);
221 static rtx aapcs_libcall_value (machine_mode);
222 static int aapcs_select_return_coproc (const_tree, const_tree);
224 #ifdef OBJECT_FORMAT_ELF
225 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
226 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
227 #endif
228 #ifndef ARM_PE
229 static void arm_encode_section_info (tree, rtx, int);
230 #endif
232 static void arm_file_end (void);
233 static void arm_file_start (void);
235 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
236 tree, int *, int);
237 static bool arm_pass_by_reference (cumulative_args_t,
238 machine_mode, const_tree, bool);
239 static bool arm_promote_prototypes (const_tree);
240 static bool arm_default_short_enums (void);
241 static bool arm_align_anon_bitfield (void);
242 static bool arm_return_in_msb (const_tree);
243 static bool arm_must_pass_in_stack (machine_mode, const_tree);
244 static bool arm_return_in_memory (const_tree, const_tree);
245 #if ARM_UNWIND_INFO
246 static void arm_unwind_emit (FILE *, rtx_insn *);
247 static bool arm_output_ttype (rtx);
248 static void arm_asm_emit_except_personality (rtx);
249 static void arm_asm_init_sections (void);
250 #endif
251 static rtx arm_dwarf_register_span (rtx);
253 static tree arm_cxx_guard_type (void);
254 static bool arm_cxx_guard_mask_bit (void);
255 static tree arm_get_cookie_size (tree);
256 static bool arm_cookie_has_size (void);
257 static bool arm_cxx_cdtor_returns_this (void);
258 static bool arm_cxx_key_method_may_be_inline (void);
259 static void arm_cxx_determine_class_data_visibility (tree);
260 static bool arm_cxx_class_data_always_comdat (void);
261 static bool arm_cxx_use_aeabi_atexit (void);
262 static void arm_init_libfuncs (void);
263 static tree arm_build_builtin_va_list (void);
264 static void arm_expand_builtin_va_start (tree, rtx);
265 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
266 static void arm_option_override (void);
267 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
268 static bool arm_macro_fusion_p (void);
269 static bool arm_cannot_copy_insn_p (rtx_insn *);
270 static int arm_issue_rate (void);
271 static int arm_first_cycle_multipass_dfa_lookahead (void);
272 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
273 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
274 static bool arm_output_addr_const_extra (FILE *, rtx);
275 static bool arm_allocate_stack_slots_for_args (void);
276 static bool arm_warn_func_return (tree);
277 static const char *arm_invalid_parameter_type (const_tree t);
278 static const char *arm_invalid_return_type (const_tree t);
279 static tree arm_promoted_type (const_tree t);
280 static tree arm_convert_to_type (tree type, tree expr);
281 static bool arm_scalar_mode_supported_p (machine_mode);
282 static bool arm_frame_pointer_required (void);
283 static bool arm_can_eliminate (const int, const int);
284 static void arm_asm_trampoline_template (FILE *);
285 static void arm_trampoline_init (rtx, tree, rtx);
286 static rtx arm_trampoline_adjust_address (rtx);
287 static rtx arm_pic_static_addr (rtx orig, rtx reg);
288 static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
289 static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
290 static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
291 static bool arm_array_mode_supported_p (machine_mode,
292 unsigned HOST_WIDE_INT);
293 static machine_mode arm_preferred_simd_mode (machine_mode);
294 static bool arm_class_likely_spilled_p (reg_class_t);
295 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
296 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
297 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
298 const_tree type,
299 int misalignment,
300 bool is_packed);
301 static void arm_conditional_register_usage (void);
302 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
303 static unsigned int arm_autovectorize_vector_sizes (void);
304 static int arm_default_branch_cost (bool, bool);
305 static int arm_cortex_a5_branch_cost (bool, bool);
306 static int arm_cortex_m_branch_cost (bool, bool);
307 static int arm_cortex_m7_branch_cost (bool, bool);
309 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
310 const unsigned char *sel);
312 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
314 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
315 tree vectype,
316 int misalign ATTRIBUTE_UNUSED);
317 static unsigned arm_add_stmt_cost (void *data, int count,
318 enum vect_cost_for_stmt kind,
319 struct _stmt_vec_info *stmt_info,
320 int misalign,
321 enum vect_cost_model_location where);
323 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
324 bool op0_preserve_value);
325 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
327 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
329 /* Table of machine attributes. */
330 static const struct attribute_spec arm_attribute_table[] =
332 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
333 affects_type_identity } */
334 /* Function calls made to this symbol must be done indirectly, because
335 it may lie outside of the 26 bit addressing range of a normal function
336 call. */
337 { "long_call", 0, 0, false, true, true, NULL, false },
338 /* Whereas these functions are always known to reside within the 26 bit
339 addressing range. */
340 { "short_call", 0, 0, false, true, true, NULL, false },
341 /* Specify the procedure call conventions for a function. */
342 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
343 false },
344 /* Interrupt Service Routines have special prologue and epilogue requirements. */
345 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
346 false },
347 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
348 false },
349 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
350 false },
351 #ifdef ARM_PE
352 /* ARM/PE has three new attributes:
353 interfacearm - ?
354 dllexport - for exporting a function/variable that will live in a dll
355 dllimport - for importing a function/variable from a dll
357 Microsoft allows multiple declspecs in one __declspec, separating
358 them with spaces. We do NOT support this. Instead, use __declspec
359 multiple times.
361 { "dllimport", 0, 0, true, false, false, NULL, false },
362 { "dllexport", 0, 0, true, false, false, NULL, false },
363 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
364 false },
365 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
366 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
367 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
368 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
369 false },
370 #endif
371 { NULL, 0, 0, false, false, false, NULL, false }
374 /* Initialize the GCC target structure. */
375 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
376 #undef TARGET_MERGE_DECL_ATTRIBUTES
377 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
378 #endif
380 #undef TARGET_LEGITIMIZE_ADDRESS
381 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
383 #undef TARGET_LRA_P
384 #define TARGET_LRA_P hook_bool_void_true
386 #undef TARGET_ATTRIBUTE_TABLE
387 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
389 #undef TARGET_ASM_FILE_START
390 #define TARGET_ASM_FILE_START arm_file_start
391 #undef TARGET_ASM_FILE_END
392 #define TARGET_ASM_FILE_END arm_file_end
394 #undef TARGET_ASM_ALIGNED_SI_OP
395 #define TARGET_ASM_ALIGNED_SI_OP NULL
396 #undef TARGET_ASM_INTEGER
397 #define TARGET_ASM_INTEGER arm_assemble_integer
399 #undef TARGET_PRINT_OPERAND
400 #define TARGET_PRINT_OPERAND arm_print_operand
401 #undef TARGET_PRINT_OPERAND_ADDRESS
402 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
403 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
404 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
406 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
407 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
409 #undef TARGET_ASM_FUNCTION_PROLOGUE
410 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
412 #undef TARGET_ASM_FUNCTION_EPILOGUE
413 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
415 #undef TARGET_OPTION_OVERRIDE
416 #define TARGET_OPTION_OVERRIDE arm_option_override
418 #undef TARGET_COMP_TYPE_ATTRIBUTES
419 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
421 #undef TARGET_SCHED_MACRO_FUSION_P
422 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
424 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
425 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
427 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
428 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
430 #undef TARGET_SCHED_ADJUST_COST
431 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
433 #undef TARGET_SCHED_REORDER
434 #define TARGET_SCHED_REORDER arm_sched_reorder
436 #undef TARGET_REGISTER_MOVE_COST
437 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
439 #undef TARGET_MEMORY_MOVE_COST
440 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
442 #undef TARGET_ENCODE_SECTION_INFO
443 #ifdef ARM_PE
444 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
445 #else
446 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
447 #endif
449 #undef TARGET_STRIP_NAME_ENCODING
450 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
452 #undef TARGET_ASM_INTERNAL_LABEL
453 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
455 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
456 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
458 #undef TARGET_FUNCTION_VALUE
459 #define TARGET_FUNCTION_VALUE arm_function_value
461 #undef TARGET_LIBCALL_VALUE
462 #define TARGET_LIBCALL_VALUE arm_libcall_value
464 #undef TARGET_FUNCTION_VALUE_REGNO_P
465 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
467 #undef TARGET_ASM_OUTPUT_MI_THUNK
468 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
469 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
470 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
472 #undef TARGET_RTX_COSTS
473 #define TARGET_RTX_COSTS arm_rtx_costs
474 #undef TARGET_ADDRESS_COST
475 #define TARGET_ADDRESS_COST arm_address_cost
477 #undef TARGET_SHIFT_TRUNCATION_MASK
478 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
479 #undef TARGET_VECTOR_MODE_SUPPORTED_P
480 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
481 #undef TARGET_ARRAY_MODE_SUPPORTED_P
482 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
483 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
484 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
485 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
486 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
487 arm_autovectorize_vector_sizes
489 #undef TARGET_MACHINE_DEPENDENT_REORG
490 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
492 #undef TARGET_INIT_BUILTINS
493 #define TARGET_INIT_BUILTINS arm_init_builtins
494 #undef TARGET_EXPAND_BUILTIN
495 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
496 #undef TARGET_BUILTIN_DECL
497 #define TARGET_BUILTIN_DECL arm_builtin_decl
499 #undef TARGET_INIT_LIBFUNCS
500 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
502 #undef TARGET_PROMOTE_FUNCTION_MODE
503 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
504 #undef TARGET_PROMOTE_PROTOTYPES
505 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
506 #undef TARGET_PASS_BY_REFERENCE
507 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
508 #undef TARGET_ARG_PARTIAL_BYTES
509 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
510 #undef TARGET_FUNCTION_ARG
511 #define TARGET_FUNCTION_ARG arm_function_arg
512 #undef TARGET_FUNCTION_ARG_ADVANCE
513 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
514 #undef TARGET_FUNCTION_ARG_BOUNDARY
515 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
517 #undef TARGET_SETUP_INCOMING_VARARGS
518 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
520 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
521 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
523 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
524 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
525 #undef TARGET_TRAMPOLINE_INIT
526 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
527 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
528 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
530 #undef TARGET_WARN_FUNC_RETURN
531 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
533 #undef TARGET_DEFAULT_SHORT_ENUMS
534 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
536 #undef TARGET_ALIGN_ANON_BITFIELD
537 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
539 #undef TARGET_NARROW_VOLATILE_BITFIELD
540 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
542 #undef TARGET_CXX_GUARD_TYPE
543 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
545 #undef TARGET_CXX_GUARD_MASK_BIT
546 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
548 #undef TARGET_CXX_GET_COOKIE_SIZE
549 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
551 #undef TARGET_CXX_COOKIE_HAS_SIZE
552 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
554 #undef TARGET_CXX_CDTOR_RETURNS_THIS
555 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
557 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
558 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
560 #undef TARGET_CXX_USE_AEABI_ATEXIT
561 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
563 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
564 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
565 arm_cxx_determine_class_data_visibility
567 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
568 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
570 #undef TARGET_RETURN_IN_MSB
571 #define TARGET_RETURN_IN_MSB arm_return_in_msb
573 #undef TARGET_RETURN_IN_MEMORY
574 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
576 #undef TARGET_MUST_PASS_IN_STACK
577 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
579 #if ARM_UNWIND_INFO
580 #undef TARGET_ASM_UNWIND_EMIT
581 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
583 /* EABI unwinding tables use a different format for the typeinfo tables. */
584 #undef TARGET_ASM_TTYPE
585 #define TARGET_ASM_TTYPE arm_output_ttype
587 #undef TARGET_ARM_EABI_UNWINDER
588 #define TARGET_ARM_EABI_UNWINDER true
590 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
591 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
593 #undef TARGET_ASM_INIT_SECTIONS
594 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
595 #endif /* ARM_UNWIND_INFO */
597 #undef TARGET_DWARF_REGISTER_SPAN
598 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
600 #undef TARGET_CANNOT_COPY_INSN_P
601 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
603 #ifdef HAVE_AS_TLS
604 #undef TARGET_HAVE_TLS
605 #define TARGET_HAVE_TLS true
606 #endif
608 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
609 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
611 #undef TARGET_LEGITIMATE_CONSTANT_P
612 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
614 #undef TARGET_CANNOT_FORCE_CONST_MEM
615 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
617 #undef TARGET_MAX_ANCHOR_OFFSET
618 #define TARGET_MAX_ANCHOR_OFFSET 4095
620 /* The minimum is set such that the total size of the block
621 for a particular anchor is -4088 + 1 + 4095 bytes, which is
622 divisible by eight, ensuring natural spacing of anchors. */
623 #undef TARGET_MIN_ANCHOR_OFFSET
624 #define TARGET_MIN_ANCHOR_OFFSET -4088
626 #undef TARGET_SCHED_ISSUE_RATE
627 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
629 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
630 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
631 arm_first_cycle_multipass_dfa_lookahead
633 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
634 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
635 arm_first_cycle_multipass_dfa_lookahead_guard
637 #undef TARGET_MANGLE_TYPE
638 #define TARGET_MANGLE_TYPE arm_mangle_type
640 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
641 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
643 #undef TARGET_BUILD_BUILTIN_VA_LIST
644 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
645 #undef TARGET_EXPAND_BUILTIN_VA_START
646 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
647 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
648 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
650 #ifdef HAVE_AS_TLS
651 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
652 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
653 #endif
655 #undef TARGET_LEGITIMATE_ADDRESS_P
656 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
658 #undef TARGET_PREFERRED_RELOAD_CLASS
659 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
661 #undef TARGET_INVALID_PARAMETER_TYPE
662 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
664 #undef TARGET_INVALID_RETURN_TYPE
665 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
667 #undef TARGET_PROMOTED_TYPE
668 #define TARGET_PROMOTED_TYPE arm_promoted_type
670 #undef TARGET_CONVERT_TO_TYPE
671 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
673 #undef TARGET_SCALAR_MODE_SUPPORTED_P
674 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
676 #undef TARGET_FRAME_POINTER_REQUIRED
677 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
679 #undef TARGET_CAN_ELIMINATE
680 #define TARGET_CAN_ELIMINATE arm_can_eliminate
682 #undef TARGET_CONDITIONAL_REGISTER_USAGE
683 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
685 #undef TARGET_CLASS_LIKELY_SPILLED_P
686 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
688 #undef TARGET_VECTORIZE_BUILTINS
689 #define TARGET_VECTORIZE_BUILTINS
691 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
692 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
693 arm_builtin_vectorized_function
695 #undef TARGET_VECTOR_ALIGNMENT
696 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
698 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
699 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
700 arm_vector_alignment_reachable
702 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
703 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
704 arm_builtin_support_vector_misalignment
706 #undef TARGET_PREFERRED_RENAME_CLASS
707 #define TARGET_PREFERRED_RENAME_CLASS \
708 arm_preferred_rename_class
710 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
711 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
712 arm_vectorize_vec_perm_const_ok
714 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
715 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
716 arm_builtin_vectorization_cost
717 #undef TARGET_VECTORIZE_ADD_STMT_COST
718 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
720 #undef TARGET_CANONICALIZE_COMPARISON
721 #define TARGET_CANONICALIZE_COMPARISON \
722 arm_canonicalize_comparison
724 #undef TARGET_ASAN_SHADOW_OFFSET
725 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
727 #undef MAX_INSN_PER_IT_BLOCK
728 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
730 #undef TARGET_CAN_USE_DOLOOP_P
731 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
733 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
734 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
736 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
737 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
739 #undef TARGET_SCHED_FUSION_PRIORITY
740 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
742 struct gcc_target targetm = TARGET_INITIALIZER;
744 /* Obstack for minipool constant handling. */
745 static struct obstack minipool_obstack;
746 static char * minipool_startobj;
748 /* The maximum number of insns skipped which
749 will be conditionalised if possible. */
750 static int max_insns_skipped = 5;
752 extern FILE * asm_out_file;
754 /* True if we are currently building a constant table. */
755 int making_const_table;
757 /* The processor for which instructions should be scheduled. */
758 enum processor_type arm_tune = arm_none;
760 /* The current tuning set. */
761 const struct tune_params *current_tune;
763 /* Which floating point hardware to schedule for. */
764 int arm_fpu_attr;
766 /* Which floating popint hardware to use. */
767 const struct arm_fpu_desc *arm_fpu_desc;
769 /* Used for Thumb call_via trampolines. */
770 rtx thumb_call_via_label[14];
771 static int thumb_call_reg_needed;
773 /* The bits in this mask specify which
774 instructions we are allowed to generate. */
775 unsigned long insn_flags = 0;
777 /* The bits in this mask specify which instruction scheduling options should
778 be used. */
779 unsigned long tune_flags = 0;
781 /* The highest ARM architecture version supported by the
782 target. */
783 enum base_architecture arm_base_arch = BASE_ARCH_0;
785 /* The following are used in the arm.md file as equivalents to bits
786 in the above two flag variables. */
788 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
789 int arm_arch3m = 0;
791 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
792 int arm_arch4 = 0;
794 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
795 int arm_arch4t = 0;
797 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
798 int arm_arch5 = 0;
800 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
801 int arm_arch5e = 0;
803 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
804 int arm_arch6 = 0;
806 /* Nonzero if this chip supports the ARM 6K extensions. */
807 int arm_arch6k = 0;
809 /* Nonzero if instructions present in ARMv6-M can be used. */
810 int arm_arch6m = 0;
812 /* Nonzero if this chip supports the ARM 7 extensions. */
813 int arm_arch7 = 0;
815 /* Nonzero if instructions not present in the 'M' profile can be used. */
816 int arm_arch_notm = 0;
818 /* Nonzero if instructions present in ARMv7E-M can be used. */
819 int arm_arch7em = 0;
821 /* Nonzero if instructions present in ARMv8 can be used. */
822 int arm_arch8 = 0;
824 /* Nonzero if this chip can benefit from load scheduling. */
825 int arm_ld_sched = 0;
827 /* Nonzero if this chip is a StrongARM. */
828 int arm_tune_strongarm = 0;
830 /* Nonzero if this chip supports Intel Wireless MMX technology. */
831 int arm_arch_iwmmxt = 0;
833 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
834 int arm_arch_iwmmxt2 = 0;
836 /* Nonzero if this chip is an XScale. */
837 int arm_arch_xscale = 0;
839 /* Nonzero if tuning for XScale */
840 int arm_tune_xscale = 0;
842 /* Nonzero if we want to tune for stores that access the write-buffer.
843 This typically means an ARM6 or ARM7 with MMU or MPU. */
844 int arm_tune_wbuf = 0;
846 /* Nonzero if tuning for Cortex-A9. */
847 int arm_tune_cortex_a9 = 0;
849 /* Nonzero if generating Thumb instructions. */
850 int thumb_code = 0;
852 /* Nonzero if generating Thumb-1 instructions. */
853 int thumb1_code = 0;
855 /* Nonzero if we should define __THUMB_INTERWORK__ in the
856 preprocessor.
857 XXX This is a bit of a hack, it's intended to help work around
858 problems in GLD which doesn't understand that armv5t code is
859 interworking clean. */
860 int arm_cpp_interwork = 0;
862 /* Nonzero if chip supports Thumb 2. */
863 int arm_arch_thumb2;
865 /* Nonzero if chip supports integer division instruction. */
866 int arm_arch_arm_hwdiv;
867 int arm_arch_thumb_hwdiv;
869 /* Nonzero if chip disallows volatile memory access in IT block. */
870 int arm_arch_no_volatile_ce;
872 /* Nonzero if we should use Neon to handle 64-bits operations rather
873 than core registers. */
874 int prefer_neon_for_64bits = 0;
876 /* Nonzero if we shouldn't use literal pools. */
877 bool arm_disable_literal_pool = false;
879 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
880 we must report the mode of the memory reference from
881 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
882 machine_mode output_memory_reference_mode;
884 /* The register number to be used for the PIC offset register. */
885 unsigned arm_pic_register = INVALID_REGNUM;
887 enum arm_pcs arm_pcs_default;
889 /* For an explanation of these variables, see final_prescan_insn below. */
890 int arm_ccfsm_state;
891 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
892 enum arm_cond_code arm_current_cc;
894 rtx arm_target_insn;
895 int arm_target_label;
896 /* The number of conditionally executed insns, including the current insn. */
897 int arm_condexec_count = 0;
898 /* A bitmask specifying the patterns for the IT block.
899 Zero means do not output an IT block before this insn. */
900 int arm_condexec_mask = 0;
901 /* The number of bits used in arm_condexec_mask. */
902 int arm_condexec_masklen = 0;
904 /* Nonzero if chip supports the ARMv8 CRC instructions. */
905 int arm_arch_crc = 0;
907 /* Nonzero if the core has a very small, high-latency, multiply unit. */
908 int arm_m_profile_small_mul = 0;
910 /* The condition codes of the ARM, and the inverse function. */
911 static const char * const arm_condition_codes[] =
913 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
914 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
917 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
918 int arm_regs_in_sequence[] =
920 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
923 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
924 #define streq(string1, string2) (strcmp (string1, string2) == 0)
926 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
927 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
928 | (1 << PIC_OFFSET_TABLE_REGNUM)))
930 /* Initialization code. */
932 struct processors
934 const char *const name;
935 enum processor_type core;
936 const char *arch;
937 enum base_architecture base_arch;
938 const unsigned long flags;
939 const struct tune_params *const tune;
943 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
944 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
945 prefetch_slots, \
946 l1_size, \
947 l1_line_size
949 /* arm generic vectorizer costs. */
950 static const
951 struct cpu_vec_costs arm_default_vec_cost = {
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 1, /* vec_unalign_load_cost. */
960 1, /* vec_unalign_store_cost. */
961 1, /* vec_store_cost. */
962 3, /* cond_taken_branch_cost. */
963 1, /* cond_not_taken_branch_cost. */
966 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
967 #include "aarch-cost-tables.h"
971 const struct cpu_cost_table cortexa9_extra_costs =
973 /* ALU */
975 0, /* arith. */
976 0, /* logical. */
977 0, /* shift. */
978 COSTS_N_INSNS (1), /* shift_reg. */
979 COSTS_N_INSNS (1), /* arith_shift. */
980 COSTS_N_INSNS (2), /* arith_shift_reg. */
981 0, /* log_shift. */
982 COSTS_N_INSNS (1), /* log_shift_reg. */
983 COSTS_N_INSNS (1), /* extend. */
984 COSTS_N_INSNS (2), /* extend_arith. */
985 COSTS_N_INSNS (1), /* bfi. */
986 COSTS_N_INSNS (1), /* bfx. */
987 0, /* clz. */
988 0, /* rev. */
989 0, /* non_exec. */
990 true /* non_exec_costs_exec. */
993 /* MULT SImode */
995 COSTS_N_INSNS (3), /* simple. */
996 COSTS_N_INSNS (3), /* flag_setting. */
997 COSTS_N_INSNS (2), /* extend. */
998 COSTS_N_INSNS (3), /* add. */
999 COSTS_N_INSNS (2), /* extend_add. */
1000 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1002 /* MULT DImode */
1004 0, /* simple (N/A). */
1005 0, /* flag_setting (N/A). */
1006 COSTS_N_INSNS (4), /* extend. */
1007 0, /* add (N/A). */
1008 COSTS_N_INSNS (4), /* extend_add. */
1009 0 /* idiv (N/A). */
1012 /* LD/ST */
1014 COSTS_N_INSNS (2), /* load. */
1015 COSTS_N_INSNS (2), /* load_sign_extend. */
1016 COSTS_N_INSNS (2), /* ldrd. */
1017 COSTS_N_INSNS (2), /* ldm_1st. */
1018 1, /* ldm_regs_per_insn_1st. */
1019 2, /* ldm_regs_per_insn_subsequent. */
1020 COSTS_N_INSNS (5), /* loadf. */
1021 COSTS_N_INSNS (5), /* loadd. */
1022 COSTS_N_INSNS (1), /* load_unaligned. */
1023 COSTS_N_INSNS (2), /* store. */
1024 COSTS_N_INSNS (2), /* strd. */
1025 COSTS_N_INSNS (2), /* stm_1st. */
1026 1, /* stm_regs_per_insn_1st. */
1027 2, /* stm_regs_per_insn_subsequent. */
1028 COSTS_N_INSNS (1), /* storef. */
1029 COSTS_N_INSNS (1), /* stored. */
1030 COSTS_N_INSNS (1) /* store_unaligned. */
1033 /* FP SFmode */
1035 COSTS_N_INSNS (14), /* div. */
1036 COSTS_N_INSNS (4), /* mult. */
1037 COSTS_N_INSNS (7), /* mult_addsub. */
1038 COSTS_N_INSNS (30), /* fma. */
1039 COSTS_N_INSNS (3), /* addsub. */
1040 COSTS_N_INSNS (1), /* fpconst. */
1041 COSTS_N_INSNS (1), /* neg. */
1042 COSTS_N_INSNS (3), /* compare. */
1043 COSTS_N_INSNS (3), /* widen. */
1044 COSTS_N_INSNS (3), /* narrow. */
1045 COSTS_N_INSNS (3), /* toint. */
1046 COSTS_N_INSNS (3), /* fromint. */
1047 COSTS_N_INSNS (3) /* roundint. */
1049 /* FP DFmode */
1051 COSTS_N_INSNS (24), /* div. */
1052 COSTS_N_INSNS (5), /* mult. */
1053 COSTS_N_INSNS (8), /* mult_addsub. */
1054 COSTS_N_INSNS (30), /* fma. */
1055 COSTS_N_INSNS (3), /* addsub. */
1056 COSTS_N_INSNS (1), /* fpconst. */
1057 COSTS_N_INSNS (1), /* neg. */
1058 COSTS_N_INSNS (3), /* compare. */
1059 COSTS_N_INSNS (3), /* widen. */
1060 COSTS_N_INSNS (3), /* narrow. */
1061 COSTS_N_INSNS (3), /* toint. */
1062 COSTS_N_INSNS (3), /* fromint. */
1063 COSTS_N_INSNS (3) /* roundint. */
1066 /* Vector */
1068 COSTS_N_INSNS (1) /* alu. */
1072 const struct cpu_cost_table cortexa8_extra_costs =
1074 /* ALU */
1076 0, /* arith. */
1077 0, /* logical. */
1078 COSTS_N_INSNS (1), /* shift. */
1079 0, /* shift_reg. */
1080 COSTS_N_INSNS (1), /* arith_shift. */
1081 0, /* arith_shift_reg. */
1082 COSTS_N_INSNS (1), /* log_shift. */
1083 0, /* log_shift_reg. */
1084 0, /* extend. */
1085 0, /* extend_arith. */
1086 0, /* bfi. */
1087 0, /* bfx. */
1088 0, /* clz. */
1089 0, /* rev. */
1090 0, /* non_exec. */
1091 true /* non_exec_costs_exec. */
1094 /* MULT SImode */
1096 COSTS_N_INSNS (1), /* simple. */
1097 COSTS_N_INSNS (1), /* flag_setting. */
1098 COSTS_N_INSNS (1), /* extend. */
1099 COSTS_N_INSNS (1), /* add. */
1100 COSTS_N_INSNS (1), /* extend_add. */
1101 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1103 /* MULT DImode */
1105 0, /* simple (N/A). */
1106 0, /* flag_setting (N/A). */
1107 COSTS_N_INSNS (2), /* extend. */
1108 0, /* add (N/A). */
1109 COSTS_N_INSNS (2), /* extend_add. */
1110 0 /* idiv (N/A). */
1113 /* LD/ST */
1115 COSTS_N_INSNS (1), /* load. */
1116 COSTS_N_INSNS (1), /* load_sign_extend. */
1117 COSTS_N_INSNS (1), /* ldrd. */
1118 COSTS_N_INSNS (1), /* ldm_1st. */
1119 1, /* ldm_regs_per_insn_1st. */
1120 2, /* ldm_regs_per_insn_subsequent. */
1121 COSTS_N_INSNS (1), /* loadf. */
1122 COSTS_N_INSNS (1), /* loadd. */
1123 COSTS_N_INSNS (1), /* load_unaligned. */
1124 COSTS_N_INSNS (1), /* store. */
1125 COSTS_N_INSNS (1), /* strd. */
1126 COSTS_N_INSNS (1), /* stm_1st. */
1127 1, /* stm_regs_per_insn_1st. */
1128 2, /* stm_regs_per_insn_subsequent. */
1129 COSTS_N_INSNS (1), /* storef. */
1130 COSTS_N_INSNS (1), /* stored. */
1131 COSTS_N_INSNS (1) /* store_unaligned. */
1134 /* FP SFmode */
1136 COSTS_N_INSNS (36), /* div. */
1137 COSTS_N_INSNS (11), /* mult. */
1138 COSTS_N_INSNS (20), /* mult_addsub. */
1139 COSTS_N_INSNS (30), /* fma. */
1140 COSTS_N_INSNS (9), /* addsub. */
1141 COSTS_N_INSNS (3), /* fpconst. */
1142 COSTS_N_INSNS (3), /* neg. */
1143 COSTS_N_INSNS (6), /* compare. */
1144 COSTS_N_INSNS (4), /* widen. */
1145 COSTS_N_INSNS (4), /* narrow. */
1146 COSTS_N_INSNS (8), /* toint. */
1147 COSTS_N_INSNS (8), /* fromint. */
1148 COSTS_N_INSNS (8) /* roundint. */
1150 /* FP DFmode */
1152 COSTS_N_INSNS (64), /* div. */
1153 COSTS_N_INSNS (16), /* mult. */
1154 COSTS_N_INSNS (25), /* mult_addsub. */
1155 COSTS_N_INSNS (30), /* fma. */
1156 COSTS_N_INSNS (9), /* addsub. */
1157 COSTS_N_INSNS (3), /* fpconst. */
1158 COSTS_N_INSNS (3), /* neg. */
1159 COSTS_N_INSNS (6), /* compare. */
1160 COSTS_N_INSNS (6), /* widen. */
1161 COSTS_N_INSNS (6), /* narrow. */
1162 COSTS_N_INSNS (8), /* toint. */
1163 COSTS_N_INSNS (8), /* fromint. */
1164 COSTS_N_INSNS (8) /* roundint. */
1167 /* Vector */
1169 COSTS_N_INSNS (1) /* alu. */
1173 const struct cpu_cost_table cortexa5_extra_costs =
1175 /* ALU */
1177 0, /* arith. */
1178 0, /* logical. */
1179 COSTS_N_INSNS (1), /* shift. */
1180 COSTS_N_INSNS (1), /* shift_reg. */
1181 COSTS_N_INSNS (1), /* arith_shift. */
1182 COSTS_N_INSNS (1), /* arith_shift_reg. */
1183 COSTS_N_INSNS (1), /* log_shift. */
1184 COSTS_N_INSNS (1), /* log_shift_reg. */
1185 COSTS_N_INSNS (1), /* extend. */
1186 COSTS_N_INSNS (1), /* extend_arith. */
1187 COSTS_N_INSNS (1), /* bfi. */
1188 COSTS_N_INSNS (1), /* bfx. */
1189 COSTS_N_INSNS (1), /* clz. */
1190 COSTS_N_INSNS (1), /* rev. */
1191 0, /* non_exec. */
1192 true /* non_exec_costs_exec. */
1196 /* MULT SImode */
1198 0, /* simple. */
1199 COSTS_N_INSNS (1), /* flag_setting. */
1200 COSTS_N_INSNS (1), /* extend. */
1201 COSTS_N_INSNS (1), /* add. */
1202 COSTS_N_INSNS (1), /* extend_add. */
1203 COSTS_N_INSNS (7) /* idiv. */
1205 /* MULT DImode */
1207 0, /* simple (N/A). */
1208 0, /* flag_setting (N/A). */
1209 COSTS_N_INSNS (1), /* extend. */
1210 0, /* add. */
1211 COSTS_N_INSNS (2), /* extend_add. */
1212 0 /* idiv (N/A). */
1215 /* LD/ST */
1217 COSTS_N_INSNS (1), /* load. */
1218 COSTS_N_INSNS (1), /* load_sign_extend. */
1219 COSTS_N_INSNS (6), /* ldrd. */
1220 COSTS_N_INSNS (1), /* ldm_1st. */
1221 1, /* ldm_regs_per_insn_1st. */
1222 2, /* ldm_regs_per_insn_subsequent. */
1223 COSTS_N_INSNS (2), /* loadf. */
1224 COSTS_N_INSNS (4), /* loadd. */
1225 COSTS_N_INSNS (1), /* load_unaligned. */
1226 COSTS_N_INSNS (1), /* store. */
1227 COSTS_N_INSNS (3), /* strd. */
1228 COSTS_N_INSNS (1), /* stm_1st. */
1229 1, /* stm_regs_per_insn_1st. */
1230 2, /* stm_regs_per_insn_subsequent. */
1231 COSTS_N_INSNS (2), /* storef. */
1232 COSTS_N_INSNS (2), /* stored. */
1233 COSTS_N_INSNS (1) /* store_unaligned. */
1236 /* FP SFmode */
1238 COSTS_N_INSNS (15), /* div. */
1239 COSTS_N_INSNS (3), /* mult. */
1240 COSTS_N_INSNS (7), /* mult_addsub. */
1241 COSTS_N_INSNS (7), /* fma. */
1242 COSTS_N_INSNS (3), /* addsub. */
1243 COSTS_N_INSNS (3), /* fpconst. */
1244 COSTS_N_INSNS (3), /* neg. */
1245 COSTS_N_INSNS (3), /* compare. */
1246 COSTS_N_INSNS (3), /* widen. */
1247 COSTS_N_INSNS (3), /* narrow. */
1248 COSTS_N_INSNS (3), /* toint. */
1249 COSTS_N_INSNS (3), /* fromint. */
1250 COSTS_N_INSNS (3) /* roundint. */
1252 /* FP DFmode */
1254 COSTS_N_INSNS (30), /* div. */
1255 COSTS_N_INSNS (6), /* mult. */
1256 COSTS_N_INSNS (10), /* mult_addsub. */
1257 COSTS_N_INSNS (7), /* fma. */
1258 COSTS_N_INSNS (3), /* addsub. */
1259 COSTS_N_INSNS (3), /* fpconst. */
1260 COSTS_N_INSNS (3), /* neg. */
1261 COSTS_N_INSNS (3), /* compare. */
1262 COSTS_N_INSNS (3), /* widen. */
1263 COSTS_N_INSNS (3), /* narrow. */
1264 COSTS_N_INSNS (3), /* toint. */
1265 COSTS_N_INSNS (3), /* fromint. */
1266 COSTS_N_INSNS (3) /* roundint. */
1269 /* Vector */
1271 COSTS_N_INSNS (1) /* alu. */
1276 const struct cpu_cost_table cortexa7_extra_costs =
1278 /* ALU */
1280 0, /* arith. */
1281 0, /* logical. */
1282 COSTS_N_INSNS (1), /* shift. */
1283 COSTS_N_INSNS (1), /* shift_reg. */
1284 COSTS_N_INSNS (1), /* arith_shift. */
1285 COSTS_N_INSNS (1), /* arith_shift_reg. */
1286 COSTS_N_INSNS (1), /* log_shift. */
1287 COSTS_N_INSNS (1), /* log_shift_reg. */
1288 COSTS_N_INSNS (1), /* extend. */
1289 COSTS_N_INSNS (1), /* extend_arith. */
1290 COSTS_N_INSNS (1), /* bfi. */
1291 COSTS_N_INSNS (1), /* bfx. */
1292 COSTS_N_INSNS (1), /* clz. */
1293 COSTS_N_INSNS (1), /* rev. */
1294 0, /* non_exec. */
1295 true /* non_exec_costs_exec. */
1299 /* MULT SImode */
1301 0, /* simple. */
1302 COSTS_N_INSNS (1), /* flag_setting. */
1303 COSTS_N_INSNS (1), /* extend. */
1304 COSTS_N_INSNS (1), /* add. */
1305 COSTS_N_INSNS (1), /* extend_add. */
1306 COSTS_N_INSNS (7) /* idiv. */
1308 /* MULT DImode */
1310 0, /* simple (N/A). */
1311 0, /* flag_setting (N/A). */
1312 COSTS_N_INSNS (1), /* extend. */
1313 0, /* add. */
1314 COSTS_N_INSNS (2), /* extend_add. */
1315 0 /* idiv (N/A). */
1318 /* LD/ST */
1320 COSTS_N_INSNS (1), /* load. */
1321 COSTS_N_INSNS (1), /* load_sign_extend. */
1322 COSTS_N_INSNS (3), /* ldrd. */
1323 COSTS_N_INSNS (1), /* ldm_1st. */
1324 1, /* ldm_regs_per_insn_1st. */
1325 2, /* ldm_regs_per_insn_subsequent. */
1326 COSTS_N_INSNS (2), /* loadf. */
1327 COSTS_N_INSNS (2), /* loadd. */
1328 COSTS_N_INSNS (1), /* load_unaligned. */
1329 COSTS_N_INSNS (1), /* store. */
1330 COSTS_N_INSNS (3), /* strd. */
1331 COSTS_N_INSNS (1), /* stm_1st. */
1332 1, /* stm_regs_per_insn_1st. */
1333 2, /* stm_regs_per_insn_subsequent. */
1334 COSTS_N_INSNS (2), /* storef. */
1335 COSTS_N_INSNS (2), /* stored. */
1336 COSTS_N_INSNS (1) /* store_unaligned. */
1339 /* FP SFmode */
1341 COSTS_N_INSNS (15), /* div. */
1342 COSTS_N_INSNS (3), /* mult. */
1343 COSTS_N_INSNS (7), /* mult_addsub. */
1344 COSTS_N_INSNS (7), /* fma. */
1345 COSTS_N_INSNS (3), /* addsub. */
1346 COSTS_N_INSNS (3), /* fpconst. */
1347 COSTS_N_INSNS (3), /* neg. */
1348 COSTS_N_INSNS (3), /* compare. */
1349 COSTS_N_INSNS (3), /* widen. */
1350 COSTS_N_INSNS (3), /* narrow. */
1351 COSTS_N_INSNS (3), /* toint. */
1352 COSTS_N_INSNS (3), /* fromint. */
1353 COSTS_N_INSNS (3) /* roundint. */
1355 /* FP DFmode */
1357 COSTS_N_INSNS (30), /* div. */
1358 COSTS_N_INSNS (6), /* mult. */
1359 COSTS_N_INSNS (10), /* mult_addsub. */
1360 COSTS_N_INSNS (7), /* fma. */
1361 COSTS_N_INSNS (3), /* addsub. */
1362 COSTS_N_INSNS (3), /* fpconst. */
1363 COSTS_N_INSNS (3), /* neg. */
1364 COSTS_N_INSNS (3), /* compare. */
1365 COSTS_N_INSNS (3), /* widen. */
1366 COSTS_N_INSNS (3), /* narrow. */
1367 COSTS_N_INSNS (3), /* toint. */
1368 COSTS_N_INSNS (3), /* fromint. */
1369 COSTS_N_INSNS (3) /* roundint. */
1372 /* Vector */
1374 COSTS_N_INSNS (1) /* alu. */
1378 const struct cpu_cost_table cortexa12_extra_costs =
1380 /* ALU */
1382 0, /* arith. */
1383 0, /* logical. */
1384 0, /* shift. */
1385 COSTS_N_INSNS (1), /* shift_reg. */
1386 COSTS_N_INSNS (1), /* arith_shift. */
1387 COSTS_N_INSNS (1), /* arith_shift_reg. */
1388 COSTS_N_INSNS (1), /* log_shift. */
1389 COSTS_N_INSNS (1), /* log_shift_reg. */
1390 0, /* extend. */
1391 COSTS_N_INSNS (1), /* extend_arith. */
1392 0, /* bfi. */
1393 COSTS_N_INSNS (1), /* bfx. */
1394 COSTS_N_INSNS (1), /* clz. */
1395 COSTS_N_INSNS (1), /* rev. */
1396 0, /* non_exec. */
1397 true /* non_exec_costs_exec. */
1399 /* MULT SImode */
1402 COSTS_N_INSNS (2), /* simple. */
1403 COSTS_N_INSNS (3), /* flag_setting. */
1404 COSTS_N_INSNS (2), /* extend. */
1405 COSTS_N_INSNS (3), /* add. */
1406 COSTS_N_INSNS (2), /* extend_add. */
1407 COSTS_N_INSNS (18) /* idiv. */
1409 /* MULT DImode */
1411 0, /* simple (N/A). */
1412 0, /* flag_setting (N/A). */
1413 COSTS_N_INSNS (3), /* extend. */
1414 0, /* add (N/A). */
1415 COSTS_N_INSNS (3), /* extend_add. */
1416 0 /* idiv (N/A). */
1419 /* LD/ST */
1421 COSTS_N_INSNS (3), /* load. */
1422 COSTS_N_INSNS (3), /* load_sign_extend. */
1423 COSTS_N_INSNS (3), /* ldrd. */
1424 COSTS_N_INSNS (3), /* ldm_1st. */
1425 1, /* ldm_regs_per_insn_1st. */
1426 2, /* ldm_regs_per_insn_subsequent. */
1427 COSTS_N_INSNS (3), /* loadf. */
1428 COSTS_N_INSNS (3), /* loadd. */
1429 0, /* load_unaligned. */
1430 0, /* store. */
1431 0, /* strd. */
1432 0, /* stm_1st. */
1433 1, /* stm_regs_per_insn_1st. */
1434 2, /* stm_regs_per_insn_subsequent. */
1435 COSTS_N_INSNS (2), /* storef. */
1436 COSTS_N_INSNS (2), /* stored. */
1437 0 /* store_unaligned. */
1440 /* FP SFmode */
1442 COSTS_N_INSNS (17), /* div. */
1443 COSTS_N_INSNS (4), /* mult. */
1444 COSTS_N_INSNS (8), /* mult_addsub. */
1445 COSTS_N_INSNS (8), /* fma. */
1446 COSTS_N_INSNS (4), /* addsub. */
1447 COSTS_N_INSNS (2), /* fpconst. */
1448 COSTS_N_INSNS (2), /* neg. */
1449 COSTS_N_INSNS (2), /* compare. */
1450 COSTS_N_INSNS (4), /* widen. */
1451 COSTS_N_INSNS (4), /* narrow. */
1452 COSTS_N_INSNS (4), /* toint. */
1453 COSTS_N_INSNS (4), /* fromint. */
1454 COSTS_N_INSNS (4) /* roundint. */
1456 /* FP DFmode */
1458 COSTS_N_INSNS (31), /* div. */
1459 COSTS_N_INSNS (4), /* mult. */
1460 COSTS_N_INSNS (8), /* mult_addsub. */
1461 COSTS_N_INSNS (8), /* fma. */
1462 COSTS_N_INSNS (4), /* addsub. */
1463 COSTS_N_INSNS (2), /* fpconst. */
1464 COSTS_N_INSNS (2), /* neg. */
1465 COSTS_N_INSNS (2), /* compare. */
1466 COSTS_N_INSNS (4), /* widen. */
1467 COSTS_N_INSNS (4), /* narrow. */
1468 COSTS_N_INSNS (4), /* toint. */
1469 COSTS_N_INSNS (4), /* fromint. */
1470 COSTS_N_INSNS (4) /* roundint. */
1473 /* Vector */
1475 COSTS_N_INSNS (1) /* alu. */
1479 const struct cpu_cost_table cortexa15_extra_costs =
1481 /* ALU */
1483 0, /* arith. */
1484 0, /* logical. */
1485 0, /* shift. */
1486 0, /* shift_reg. */
1487 COSTS_N_INSNS (1), /* arith_shift. */
1488 COSTS_N_INSNS (1), /* arith_shift_reg. */
1489 COSTS_N_INSNS (1), /* log_shift. */
1490 COSTS_N_INSNS (1), /* log_shift_reg. */
1491 0, /* extend. */
1492 COSTS_N_INSNS (1), /* extend_arith. */
1493 COSTS_N_INSNS (1), /* bfi. */
1494 0, /* bfx. */
1495 0, /* clz. */
1496 0, /* rev. */
1497 0, /* non_exec. */
1498 true /* non_exec_costs_exec. */
1500 /* MULT SImode */
1503 COSTS_N_INSNS (2), /* simple. */
1504 COSTS_N_INSNS (3), /* flag_setting. */
1505 COSTS_N_INSNS (2), /* extend. */
1506 COSTS_N_INSNS (2), /* add. */
1507 COSTS_N_INSNS (2), /* extend_add. */
1508 COSTS_N_INSNS (18) /* idiv. */
1510 /* MULT DImode */
1512 0, /* simple (N/A). */
1513 0, /* flag_setting (N/A). */
1514 COSTS_N_INSNS (3), /* extend. */
1515 0, /* add (N/A). */
1516 COSTS_N_INSNS (3), /* extend_add. */
1517 0 /* idiv (N/A). */
1520 /* LD/ST */
1522 COSTS_N_INSNS (3), /* load. */
1523 COSTS_N_INSNS (3), /* load_sign_extend. */
1524 COSTS_N_INSNS (3), /* ldrd. */
1525 COSTS_N_INSNS (4), /* ldm_1st. */
1526 1, /* ldm_regs_per_insn_1st. */
1527 2, /* ldm_regs_per_insn_subsequent. */
1528 COSTS_N_INSNS (4), /* loadf. */
1529 COSTS_N_INSNS (4), /* loadd. */
1530 0, /* load_unaligned. */
1531 0, /* store. */
1532 0, /* strd. */
1533 COSTS_N_INSNS (1), /* stm_1st. */
1534 1, /* stm_regs_per_insn_1st. */
1535 2, /* stm_regs_per_insn_subsequent. */
1536 0, /* storef. */
1537 0, /* stored. */
1538 0 /* store_unaligned. */
1541 /* FP SFmode */
1543 COSTS_N_INSNS (17), /* div. */
1544 COSTS_N_INSNS (4), /* mult. */
1545 COSTS_N_INSNS (8), /* mult_addsub. */
1546 COSTS_N_INSNS (8), /* fma. */
1547 COSTS_N_INSNS (4), /* addsub. */
1548 COSTS_N_INSNS (2), /* fpconst. */
1549 COSTS_N_INSNS (2), /* neg. */
1550 COSTS_N_INSNS (5), /* compare. */
1551 COSTS_N_INSNS (4), /* widen. */
1552 COSTS_N_INSNS (4), /* narrow. */
1553 COSTS_N_INSNS (4), /* toint. */
1554 COSTS_N_INSNS (4), /* fromint. */
1555 COSTS_N_INSNS (4) /* roundint. */
1557 /* FP DFmode */
1559 COSTS_N_INSNS (31), /* div. */
1560 COSTS_N_INSNS (4), /* mult. */
1561 COSTS_N_INSNS (8), /* mult_addsub. */
1562 COSTS_N_INSNS (8), /* fma. */
1563 COSTS_N_INSNS (4), /* addsub. */
1564 COSTS_N_INSNS (2), /* fpconst. */
1565 COSTS_N_INSNS (2), /* neg. */
1566 COSTS_N_INSNS (2), /* compare. */
1567 COSTS_N_INSNS (4), /* widen. */
1568 COSTS_N_INSNS (4), /* narrow. */
1569 COSTS_N_INSNS (4), /* toint. */
1570 COSTS_N_INSNS (4), /* fromint. */
1571 COSTS_N_INSNS (4) /* roundint. */
1574 /* Vector */
1576 COSTS_N_INSNS (1) /* alu. */
1580 const struct cpu_cost_table v7m_extra_costs =
1582 /* ALU */
1584 0, /* arith. */
1585 0, /* logical. */
1586 0, /* shift. */
1587 0, /* shift_reg. */
1588 0, /* arith_shift. */
1589 COSTS_N_INSNS (1), /* arith_shift_reg. */
1590 0, /* log_shift. */
1591 COSTS_N_INSNS (1), /* log_shift_reg. */
1592 0, /* extend. */
1593 COSTS_N_INSNS (1), /* extend_arith. */
1594 0, /* bfi. */
1595 0, /* bfx. */
1596 0, /* clz. */
1597 0, /* rev. */
1598 COSTS_N_INSNS (1), /* non_exec. */
1599 false /* non_exec_costs_exec. */
1602 /* MULT SImode */
1604 COSTS_N_INSNS (1), /* simple. */
1605 COSTS_N_INSNS (1), /* flag_setting. */
1606 COSTS_N_INSNS (2), /* extend. */
1607 COSTS_N_INSNS (1), /* add. */
1608 COSTS_N_INSNS (3), /* extend_add. */
1609 COSTS_N_INSNS (8) /* idiv. */
1611 /* MULT DImode */
1613 0, /* simple (N/A). */
1614 0, /* flag_setting (N/A). */
1615 COSTS_N_INSNS (2), /* extend. */
1616 0, /* add (N/A). */
1617 COSTS_N_INSNS (3), /* extend_add. */
1618 0 /* idiv (N/A). */
1621 /* LD/ST */
1623 COSTS_N_INSNS (2), /* load. */
1624 0, /* load_sign_extend. */
1625 COSTS_N_INSNS (3), /* ldrd. */
1626 COSTS_N_INSNS (2), /* ldm_1st. */
1627 1, /* ldm_regs_per_insn_1st. */
1628 1, /* ldm_regs_per_insn_subsequent. */
1629 COSTS_N_INSNS (2), /* loadf. */
1630 COSTS_N_INSNS (3), /* loadd. */
1631 COSTS_N_INSNS (1), /* load_unaligned. */
1632 COSTS_N_INSNS (2), /* store. */
1633 COSTS_N_INSNS (3), /* strd. */
1634 COSTS_N_INSNS (2), /* stm_1st. */
1635 1, /* stm_regs_per_insn_1st. */
1636 1, /* stm_regs_per_insn_subsequent. */
1637 COSTS_N_INSNS (2), /* storef. */
1638 COSTS_N_INSNS (3), /* stored. */
1639 COSTS_N_INSNS (1) /* store_unaligned. */
1642 /* FP SFmode */
1644 COSTS_N_INSNS (7), /* div. */
1645 COSTS_N_INSNS (2), /* mult. */
1646 COSTS_N_INSNS (5), /* mult_addsub. */
1647 COSTS_N_INSNS (3), /* fma. */
1648 COSTS_N_INSNS (1), /* addsub. */
1649 0, /* fpconst. */
1650 0, /* neg. */
1651 0, /* compare. */
1652 0, /* widen. */
1653 0, /* narrow. */
1654 0, /* toint. */
1655 0, /* fromint. */
1656 0 /* roundint. */
1658 /* FP DFmode */
1660 COSTS_N_INSNS (15), /* div. */
1661 COSTS_N_INSNS (5), /* mult. */
1662 COSTS_N_INSNS (7), /* mult_addsub. */
1663 COSTS_N_INSNS (7), /* fma. */
1664 COSTS_N_INSNS (3), /* addsub. */
1665 0, /* fpconst. */
1666 0, /* neg. */
1667 0, /* compare. */
1668 0, /* widen. */
1669 0, /* narrow. */
1670 0, /* toint. */
1671 0, /* fromint. */
1672 0 /* roundint. */
1675 /* Vector */
1677 COSTS_N_INSNS (1) /* alu. */
1681 #define ARM_FUSE_NOTHING (0)
1682 #define ARM_FUSE_MOVW_MOVT (1 << 0)
1684 const struct tune_params arm_slowmul_tune =
1686 arm_slowmul_rtx_costs,
1687 NULL,
1688 NULL, /* Sched adj cost. */
1689 3, /* Constant limit. */
1690 5, /* Max cond insns. */
1691 ARM_PREFETCH_NOT_BENEFICIAL,
1692 true, /* Prefer constant pool. */
1693 arm_default_branch_cost,
1694 false, /* Prefer LDRD/STRD. */
1695 {true, true}, /* Prefer non short circuit. */
1696 &arm_default_vec_cost, /* Vectorizer costs. */
1697 false, /* Prefer Neon for 64-bits bitops. */
1698 false, false, /* Prefer 32-bit encodings. */
1699 false, /* Prefer Neon for stringops. */
1700 8, /* Maximum insns to inline memset. */
1701 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1702 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
1703 1 /* Issue rate. */
1706 const struct tune_params arm_fastmul_tune =
1708 arm_fastmul_rtx_costs,
1709 NULL,
1710 NULL, /* Sched adj cost. */
1711 1, /* Constant limit. */
1712 5, /* Max cond insns. */
1713 ARM_PREFETCH_NOT_BENEFICIAL,
1714 true, /* Prefer constant pool. */
1715 arm_default_branch_cost,
1716 false, /* Prefer LDRD/STRD. */
1717 {true, true}, /* Prefer non short circuit. */
1718 &arm_default_vec_cost, /* Vectorizer costs. */
1719 false, /* Prefer Neon for 64-bits bitops. */
1720 false, false, /* Prefer 32-bit encodings. */
1721 false, /* Prefer Neon for stringops. */
1722 8, /* Maximum insns to inline memset. */
1723 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1724 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
1725 1 /* Issue rate. */
1728 /* StrongARM has early execution of branches, so a sequence that is worth
1729 skipping is shorter. Set max_insns_skipped to a lower value. */
1731 const struct tune_params arm_strongarm_tune =
1733 arm_fastmul_rtx_costs,
1734 NULL,
1735 NULL, /* Sched adj cost. */
1736 1, /* Constant limit. */
1737 3, /* Max cond insns. */
1738 ARM_PREFETCH_NOT_BENEFICIAL,
1739 true, /* Prefer constant pool. */
1740 arm_default_branch_cost,
1741 false, /* Prefer LDRD/STRD. */
1742 {true, true}, /* Prefer non short circuit. */
1743 &arm_default_vec_cost, /* Vectorizer costs. */
1744 false, /* Prefer Neon for 64-bits bitops. */
1745 false, false, /* Prefer 32-bit encodings. */
1746 false, /* Prefer Neon for stringops. */
1747 8, /* Maximum insns to inline memset. */
1748 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1749 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
1750 1 /* Issue rate. */
1753 const struct tune_params arm_xscale_tune =
1755 arm_xscale_rtx_costs,
1756 NULL,
1757 xscale_sched_adjust_cost,
1758 2, /* Constant limit. */
1759 3, /* Max cond insns. */
1760 ARM_PREFETCH_NOT_BENEFICIAL,
1761 true, /* Prefer constant pool. */
1762 arm_default_branch_cost,
1763 false, /* Prefer LDRD/STRD. */
1764 {true, true}, /* Prefer non short circuit. */
1765 &arm_default_vec_cost, /* Vectorizer costs. */
1766 false, /* Prefer Neon for 64-bits bitops. */
1767 false, false, /* Prefer 32-bit encodings. */
1768 false, /* Prefer Neon for stringops. */
1769 8, /* Maximum insns to inline memset. */
1770 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1771 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
1772 1 /* Issue rate. */
1775 const struct tune_params arm_9e_tune =
1777 arm_9e_rtx_costs,
1778 NULL,
1779 NULL, /* Sched adj cost. */
1780 1, /* Constant limit. */
1781 5, /* Max cond insns. */
1782 ARM_PREFETCH_NOT_BENEFICIAL,
1783 true, /* Prefer constant pool. */
1784 arm_default_branch_cost,
1785 false, /* Prefer LDRD/STRD. */
1786 {true, true}, /* Prefer non short circuit. */
1787 &arm_default_vec_cost, /* Vectorizer costs. */
1788 false, /* Prefer Neon for 64-bits bitops. */
1789 false, false, /* Prefer 32-bit encodings. */
1790 false, /* Prefer Neon for stringops. */
1791 8, /* Maximum insns to inline memset. */
1792 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1793 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
1794 1 /* Issue rate. */
1797 const struct tune_params arm_marvell_pj4_tune =
1799 arm_9e_rtx_costs,
1800 NULL,
1801 NULL, /* Sched adj cost. */
1802 1, /* Constant limit. */
1803 5, /* Max cond insns. */
1804 ARM_PREFETCH_NOT_BENEFICIAL,
1805 true, /* Prefer constant pool. */
1806 arm_default_branch_cost,
1807 false, /* Prefer LDRD/STRD. */
1808 {true, true}, /* Prefer non short circuit. */
1809 &arm_default_vec_cost, /* Vectorizer costs. */
1810 false, /* Prefer Neon for 64-bits bitops. */
1811 false, false, /* Prefer 32-bit encodings. */
1812 false, /* Prefer Neon for stringops. */
1813 8, /* Maximum insns to inline memset. */
1814 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1815 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
1816 2 /* Issue rate. */
1819 const struct tune_params arm_v6t2_tune =
1821 arm_9e_rtx_costs,
1822 NULL,
1823 NULL, /* Sched adj cost. */
1824 1, /* Constant limit. */
1825 5, /* Max cond insns. */
1826 ARM_PREFETCH_NOT_BENEFICIAL,
1827 false, /* Prefer constant pool. */
1828 arm_default_branch_cost,
1829 false, /* Prefer LDRD/STRD. */
1830 {true, true}, /* Prefer non short circuit. */
1831 &arm_default_vec_cost, /* Vectorizer costs. */
1832 false, /* Prefer Neon for 64-bits bitops. */
1833 false, false, /* Prefer 32-bit encodings. */
1834 false, /* Prefer Neon for stringops. */
1835 8, /* Maximum insns to inline memset. */
1836 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1837 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
1838 1 /* Issue rate. */
1842 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1843 const struct tune_params arm_cortex_tune =
1845 arm_9e_rtx_costs,
1846 &generic_extra_costs,
1847 NULL, /* Sched adj cost. */
1848 1, /* Constant limit. */
1849 5, /* Max cond insns. */
1850 ARM_PREFETCH_NOT_BENEFICIAL,
1851 false, /* Prefer constant pool. */
1852 arm_default_branch_cost,
1853 false, /* Prefer LDRD/STRD. */
1854 {true, true}, /* Prefer non short circuit. */
1855 &arm_default_vec_cost, /* Vectorizer costs. */
1856 false, /* Prefer Neon for 64-bits bitops. */
1857 false, false, /* Prefer 32-bit encodings. */
1858 false, /* Prefer Neon for stringops. */
1859 8, /* Maximum insns to inline memset. */
1860 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1861 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
1862 2 /* Issue rate. */
1865 const struct tune_params arm_cortex_a8_tune =
1867 arm_9e_rtx_costs,
1868 &cortexa8_extra_costs,
1869 NULL, /* Sched adj cost. */
1870 1, /* Constant limit. */
1871 5, /* Max cond insns. */
1872 ARM_PREFETCH_NOT_BENEFICIAL,
1873 false, /* Prefer constant pool. */
1874 arm_default_branch_cost,
1875 false, /* Prefer LDRD/STRD. */
1876 {true, true}, /* Prefer non short circuit. */
1877 &arm_default_vec_cost, /* Vectorizer costs. */
1878 false, /* Prefer Neon for 64-bits bitops. */
1879 false, false, /* Prefer 32-bit encodings. */
1880 true, /* Prefer Neon for stringops. */
1881 8, /* Maximum insns to inline memset. */
1882 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1883 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
1884 2 /* Issue rate. */
1887 const struct tune_params arm_cortex_a7_tune =
1889 arm_9e_rtx_costs,
1890 &cortexa7_extra_costs,
1891 NULL,
1892 1, /* Constant limit. */
1893 5, /* Max cond insns. */
1894 ARM_PREFETCH_NOT_BENEFICIAL,
1895 false, /* Prefer constant pool. */
1896 arm_default_branch_cost,
1897 false, /* Prefer LDRD/STRD. */
1898 {true, true}, /* Prefer non short circuit. */
1899 &arm_default_vec_cost, /* Vectorizer costs. */
1900 false, /* Prefer Neon for 64-bits bitops. */
1901 false, false, /* Prefer 32-bit encodings. */
1902 true, /* Prefer Neon for stringops. */
1903 8, /* Maximum insns to inline memset. */
1904 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1905 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
1906 2 /* Issue rate. */
1909 const struct tune_params arm_cortex_a15_tune =
1911 arm_9e_rtx_costs,
1912 &cortexa15_extra_costs,
1913 NULL, /* Sched adj cost. */
1914 1, /* Constant limit. */
1915 2, /* Max cond insns. */
1916 ARM_PREFETCH_NOT_BENEFICIAL,
1917 false, /* Prefer constant pool. */
1918 arm_default_branch_cost,
1919 true, /* Prefer LDRD/STRD. */
1920 {true, true}, /* Prefer non short circuit. */
1921 &arm_default_vec_cost, /* Vectorizer costs. */
1922 false, /* Prefer Neon for 64-bits bitops. */
1923 true, true, /* Prefer 32-bit encodings. */
1924 true, /* Prefer Neon for stringops. */
1925 8, /* Maximum insns to inline memset. */
1926 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1927 ARM_SCHED_AUTOPREF_FULL, /* Sched L2 autopref. */
1928 3 /* Issue rate. */
1931 const struct tune_params arm_cortex_a53_tune =
1933 arm_9e_rtx_costs,
1934 &cortexa53_extra_costs,
1935 NULL, /* Scheduler cost adjustment. */
1936 1, /* Constant limit. */
1937 5, /* Max cond insns. */
1938 ARM_PREFETCH_NOT_BENEFICIAL,
1939 false, /* Prefer constant pool. */
1940 arm_default_branch_cost,
1941 false, /* Prefer LDRD/STRD. */
1942 {true, true}, /* Prefer non short circuit. */
1943 &arm_default_vec_cost, /* Vectorizer costs. */
1944 false, /* Prefer Neon for 64-bits bitops. */
1945 false, false, /* Prefer 32-bit encodings. */
1946 true, /* Prefer Neon for stringops. */
1947 8, /* Maximum insns to inline memset. */
1948 ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */
1949 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
1950 2 /* Issue rate. */
1953 const struct tune_params arm_cortex_a57_tune =
1955 arm_9e_rtx_costs,
1956 &cortexa57_extra_costs,
1957 NULL, /* Scheduler cost adjustment. */
1958 1, /* Constant limit. */
1959 2, /* Max cond insns. */
1960 ARM_PREFETCH_NOT_BENEFICIAL,
1961 false, /* Prefer constant pool. */
1962 arm_default_branch_cost,
1963 true, /* Prefer LDRD/STRD. */
1964 {true, true}, /* Prefer non short circuit. */
1965 &arm_default_vec_cost, /* Vectorizer costs. */
1966 false, /* Prefer Neon for 64-bits bitops. */
1967 true, true, /* Prefer 32-bit encodings. */
1968 true, /* Prefer Neon for stringops. */
1969 8, /* Maximum insns to inline memset. */
1970 ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */
1971 ARM_SCHED_AUTOPREF_FULL, /* Sched L2 autopref. */
1972 3 /* Issue rate. */
1975 const struct tune_params arm_xgene1_tune =
1977 arm_9e_rtx_costs,
1978 &xgene1_extra_costs,
1979 NULL, /* Scheduler cost adjustment. */
1980 1, /* Constant limit. */
1981 2, /* Max cond insns. */
1982 ARM_PREFETCH_NOT_BENEFICIAL,
1983 false, /* Prefer constant pool. */
1984 arm_default_branch_cost,
1985 true, /* Prefer LDRD/STRD. */
1986 {true, true}, /* Prefer non short circuit. */
1987 &arm_default_vec_cost, /* Vectorizer costs. */
1988 false, /* Prefer Neon for 64-bits bitops. */
1989 true, true, /* Prefer 32-bit encodings. */
1990 false, /* Prefer Neon for stringops. */
1991 32, /* Maximum insns to inline memset. */
1992 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
1993 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
1994 4 /* Issue rate. */
1997 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1998 less appealing. Set max_insns_skipped to a low value. */
2000 const struct tune_params arm_cortex_a5_tune =
2002 arm_9e_rtx_costs,
2003 &cortexa5_extra_costs,
2004 NULL, /* Sched adj cost. */
2005 1, /* Constant limit. */
2006 1, /* Max cond insns. */
2007 ARM_PREFETCH_NOT_BENEFICIAL,
2008 false, /* Prefer constant pool. */
2009 arm_cortex_a5_branch_cost,
2010 false, /* Prefer LDRD/STRD. */
2011 {false, false}, /* Prefer non short circuit. */
2012 &arm_default_vec_cost, /* Vectorizer costs. */
2013 false, /* Prefer Neon for 64-bits bitops. */
2014 false, false, /* Prefer 32-bit encodings. */
2015 true, /* Prefer Neon for stringops. */
2016 8, /* Maximum insns to inline memset. */
2017 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
2018 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
2019 2 /* Issue rate. */
2022 const struct tune_params arm_cortex_a9_tune =
2024 arm_9e_rtx_costs,
2025 &cortexa9_extra_costs,
2026 cortex_a9_sched_adjust_cost,
2027 1, /* Constant limit. */
2028 5, /* Max cond insns. */
2029 ARM_PREFETCH_BENEFICIAL(4,32,32),
2030 false, /* Prefer constant pool. */
2031 arm_default_branch_cost,
2032 false, /* Prefer LDRD/STRD. */
2033 {true, true}, /* Prefer non short circuit. */
2034 &arm_default_vec_cost, /* Vectorizer costs. */
2035 false, /* Prefer Neon for 64-bits bitops. */
2036 false, false, /* Prefer 32-bit encodings. */
2037 false, /* Prefer Neon for stringops. */
2038 8, /* Maximum insns to inline memset. */
2039 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
2040 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
2041 2 /* Issue rate. */
2044 const struct tune_params arm_cortex_a12_tune =
2046 arm_9e_rtx_costs,
2047 &cortexa12_extra_costs,
2048 NULL, /* Sched adj cost. */
2049 1, /* Constant limit. */
2050 2, /* Max cond insns. */
2051 ARM_PREFETCH_NOT_BENEFICIAL,
2052 false, /* Prefer constant pool. */
2053 arm_default_branch_cost,
2054 true, /* Prefer LDRD/STRD. */
2055 {true, true}, /* Prefer non short circuit. */
2056 &arm_default_vec_cost, /* Vectorizer costs. */
2057 false, /* Prefer Neon for 64-bits bitops. */
2058 true, true, /* Prefer 32-bit encodings. */
2059 true, /* Prefer Neon for stringops. */
2060 8, /* Maximum insns to inline memset. */
2061 ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */
2062 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
2063 2 /* Issue rate. */
2066 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2067 cycle to execute each. An LDR from the constant pool also takes two cycles
2068 to execute, but mildly increases pipelining opportunity (consecutive
2069 loads/stores can be pipelined together, saving one cycle), and may also
2070 improve icache utilisation. Hence we prefer the constant pool for such
2071 processors. */
2073 const struct tune_params arm_v7m_tune =
2075 arm_9e_rtx_costs,
2076 &v7m_extra_costs,
2077 NULL, /* Sched adj cost. */
2078 1, /* Constant limit. */
2079 2, /* Max cond insns. */
2080 ARM_PREFETCH_NOT_BENEFICIAL,
2081 true, /* Prefer constant pool. */
2082 arm_cortex_m_branch_cost,
2083 false, /* Prefer LDRD/STRD. */
2084 {false, false}, /* Prefer non short circuit. */
2085 &arm_default_vec_cost, /* Vectorizer costs. */
2086 false, /* Prefer Neon for 64-bits bitops. */
2087 false, false, /* Prefer 32-bit encodings. */
2088 false, /* Prefer Neon for stringops. */
2089 8, /* Maximum insns to inline memset. */
2090 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
2091 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
2092 1 /* Issue rate. */
2095 /* Cortex-M7 tuning. */
2097 const struct tune_params arm_cortex_m7_tune =
2099 arm_9e_rtx_costs,
2100 &v7m_extra_costs,
2101 NULL, /* Sched adj cost. */
2102 0, /* Constant limit. */
2103 1, /* Max cond insns. */
2104 ARM_PREFETCH_NOT_BENEFICIAL,
2105 true, /* Prefer constant pool. */
2106 arm_cortex_m7_branch_cost,
2107 false, /* Prefer LDRD/STRD. */
2108 {true, true}, /* Prefer non short circuit. */
2109 &arm_default_vec_cost, /* Vectorizer costs. */
2110 false, /* Prefer Neon for 64-bits bitops. */
2111 false, false, /* Prefer 32-bit encodings. */
2112 false, /* Prefer Neon for stringops. */
2113 8, /* Maximum insns to inline memset. */
2114 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
2115 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
2116 2 /* Issue rate. */
2119 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2120 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
2121 const struct tune_params arm_v6m_tune =
2123 arm_9e_rtx_costs,
2124 NULL,
2125 NULL, /* Sched adj cost. */
2126 1, /* Constant limit. */
2127 5, /* Max cond insns. */
2128 ARM_PREFETCH_NOT_BENEFICIAL,
2129 false, /* Prefer constant pool. */
2130 arm_default_branch_cost,
2131 false, /* Prefer LDRD/STRD. */
2132 {false, false}, /* Prefer non short circuit. */
2133 &arm_default_vec_cost, /* Vectorizer costs. */
2134 false, /* Prefer Neon for 64-bits bitops. */
2135 false, false, /* Prefer 32-bit encodings. */
2136 false, /* Prefer Neon for stringops. */
2137 8, /* Maximum insns to inline memset. */
2138 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
2139 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
2140 1 /* Issue rate. */
2143 const struct tune_params arm_fa726te_tune =
2145 arm_9e_rtx_costs,
2146 NULL,
2147 fa726te_sched_adjust_cost,
2148 1, /* Constant limit. */
2149 5, /* Max cond insns. */
2150 ARM_PREFETCH_NOT_BENEFICIAL,
2151 true, /* Prefer constant pool. */
2152 arm_default_branch_cost,
2153 false, /* Prefer LDRD/STRD. */
2154 {true, true}, /* Prefer non short circuit. */
2155 &arm_default_vec_cost, /* Vectorizer costs. */
2156 false, /* Prefer Neon for 64-bits bitops. */
2157 false, false, /* Prefer 32-bit encodings. */
2158 false, /* Prefer Neon for stringops. */
2159 8, /* Maximum insns to inline memset. */
2160 ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */
2161 ARM_SCHED_AUTOPREF_OFF, /* Sched L2 autopref. */
2162 2 /* Issue rate. */
2166 /* Not all of these give usefully different compilation alternatives,
2167 but there is no simple way of generalizing them. */
2168 static const struct processors all_cores[] =
2170 /* ARM Cores */
2171 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2172 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2173 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2174 #include "arm-cores.def"
2175 #undef ARM_CORE
2176 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2179 static const struct processors all_architectures[] =
2181 /* ARM Architectures */
2182 /* We don't specify tuning costs here as it will be figured out
2183 from the core. */
2185 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2186 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2187 #include "arm-arches.def"
2188 #undef ARM_ARCH
2189 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2193 /* These are populated as commandline arguments are processed, or NULL
2194 if not specified. */
2195 static const struct processors *arm_selected_arch;
2196 static const struct processors *arm_selected_cpu;
2197 static const struct processors *arm_selected_tune;
2199 /* The name of the preprocessor macro to define for this architecture. */
2201 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
2203 /* Available values for -mfpu=. */
2205 static const struct arm_fpu_desc all_fpus[] =
2207 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2208 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2209 #include "arm-fpus.def"
2210 #undef ARM_FPU
2214 /* Supported TLS relocations. */
2216 enum tls_reloc {
2217 TLS_GD32,
2218 TLS_LDM32,
2219 TLS_LDO32,
2220 TLS_IE32,
2221 TLS_LE32,
2222 TLS_DESCSEQ /* GNU scheme */
2225 /* The maximum number of insns to be used when loading a constant. */
2226 inline static int
2227 arm_constant_limit (bool size_p)
2229 return size_p ? 1 : current_tune->constant_limit;
2232 /* Emit an insn that's a simple single-set. Both the operands must be known
2233 to be valid. */
2234 inline static rtx_insn *
2235 emit_set_insn (rtx x, rtx y)
2237 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
2240 /* Return the number of bits set in VALUE. */
2241 static unsigned
2242 bit_count (unsigned long value)
2244 unsigned long count = 0;
2246 while (value)
2248 count++;
2249 value &= value - 1; /* Clear the least-significant set bit. */
2252 return count;
2255 typedef struct
2257 machine_mode mode;
2258 const char *name;
2259 } arm_fixed_mode_set;
2261 /* A small helper for setting fixed-point library libfuncs. */
2263 static void
2264 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2265 const char *funcname, const char *modename,
2266 int num_suffix)
2268 char buffer[50];
2270 if (num_suffix == 0)
2271 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2272 else
2273 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2275 set_optab_libfunc (optable, mode, buffer);
2278 static void
2279 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2280 machine_mode from, const char *funcname,
2281 const char *toname, const char *fromname)
2283 char buffer[50];
2284 const char *maybe_suffix_2 = "";
2286 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2287 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2288 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2289 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2290 maybe_suffix_2 = "2";
2292 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2293 maybe_suffix_2);
2295 set_conv_libfunc (optable, to, from, buffer);
2298 /* Set up library functions unique to ARM. */
2300 static void
2301 arm_init_libfuncs (void)
2303 /* For Linux, we have access to kernel support for atomic operations. */
2304 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2305 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2307 /* There are no special library functions unless we are using the
2308 ARM BPABI. */
2309 if (!TARGET_BPABI)
2310 return;
2312 /* The functions below are described in Section 4 of the "Run-Time
2313 ABI for the ARM architecture", Version 1.0. */
2315 /* Double-precision floating-point arithmetic. Table 2. */
2316 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2317 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2318 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2319 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2320 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2322 /* Double-precision comparisons. Table 3. */
2323 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2324 set_optab_libfunc (ne_optab, DFmode, NULL);
2325 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2326 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2327 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2328 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2329 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2331 /* Single-precision floating-point arithmetic. Table 4. */
2332 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2333 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2334 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2335 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2336 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2338 /* Single-precision comparisons. Table 5. */
2339 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2340 set_optab_libfunc (ne_optab, SFmode, NULL);
2341 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2342 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2343 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2344 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2345 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2347 /* Floating-point to integer conversions. Table 6. */
2348 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2349 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2350 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2351 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2352 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2353 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2354 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2355 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2357 /* Conversions between floating types. Table 7. */
2358 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2359 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2361 /* Integer to floating-point conversions. Table 8. */
2362 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2363 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2364 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2365 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2366 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2367 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2368 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2369 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2371 /* Long long. Table 9. */
2372 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2373 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2374 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2375 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2376 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2377 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2378 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2379 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2381 /* Integer (32/32->32) division. \S 4.3.1. */
2382 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2383 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2385 /* The divmod functions are designed so that they can be used for
2386 plain division, even though they return both the quotient and the
2387 remainder. The quotient is returned in the usual location (i.e.,
2388 r0 for SImode, {r0, r1} for DImode), just as would be expected
2389 for an ordinary division routine. Because the AAPCS calling
2390 conventions specify that all of { r0, r1, r2, r3 } are
2391 callee-saved registers, there is no need to tell the compiler
2392 explicitly that those registers are clobbered by these
2393 routines. */
2394 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2395 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2397 /* For SImode division the ABI provides div-without-mod routines,
2398 which are faster. */
2399 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2400 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2402 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2403 divmod libcalls instead. */
2404 set_optab_libfunc (smod_optab, DImode, NULL);
2405 set_optab_libfunc (umod_optab, DImode, NULL);
2406 set_optab_libfunc (smod_optab, SImode, NULL);
2407 set_optab_libfunc (umod_optab, SImode, NULL);
2409 /* Half-precision float operations. The compiler handles all operations
2410 with NULL libfuncs by converting the SFmode. */
2411 switch (arm_fp16_format)
2413 case ARM_FP16_FORMAT_IEEE:
2414 case ARM_FP16_FORMAT_ALTERNATIVE:
2416 /* Conversions. */
2417 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2418 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2419 ? "__gnu_f2h_ieee"
2420 : "__gnu_f2h_alternative"));
2421 set_conv_libfunc (sext_optab, SFmode, HFmode,
2422 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2423 ? "__gnu_h2f_ieee"
2424 : "__gnu_h2f_alternative"));
2426 /* Arithmetic. */
2427 set_optab_libfunc (add_optab, HFmode, NULL);
2428 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2429 set_optab_libfunc (smul_optab, HFmode, NULL);
2430 set_optab_libfunc (neg_optab, HFmode, NULL);
2431 set_optab_libfunc (sub_optab, HFmode, NULL);
2433 /* Comparisons. */
2434 set_optab_libfunc (eq_optab, HFmode, NULL);
2435 set_optab_libfunc (ne_optab, HFmode, NULL);
2436 set_optab_libfunc (lt_optab, HFmode, NULL);
2437 set_optab_libfunc (le_optab, HFmode, NULL);
2438 set_optab_libfunc (ge_optab, HFmode, NULL);
2439 set_optab_libfunc (gt_optab, HFmode, NULL);
2440 set_optab_libfunc (unord_optab, HFmode, NULL);
2441 break;
2443 default:
2444 break;
2447 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2449 const arm_fixed_mode_set fixed_arith_modes[] =
2451 { QQmode, "qq" },
2452 { UQQmode, "uqq" },
2453 { HQmode, "hq" },
2454 { UHQmode, "uhq" },
2455 { SQmode, "sq" },
2456 { USQmode, "usq" },
2457 { DQmode, "dq" },
2458 { UDQmode, "udq" },
2459 { TQmode, "tq" },
2460 { UTQmode, "utq" },
2461 { HAmode, "ha" },
2462 { UHAmode, "uha" },
2463 { SAmode, "sa" },
2464 { USAmode, "usa" },
2465 { DAmode, "da" },
2466 { UDAmode, "uda" },
2467 { TAmode, "ta" },
2468 { UTAmode, "uta" }
2470 const arm_fixed_mode_set fixed_conv_modes[] =
2472 { QQmode, "qq" },
2473 { UQQmode, "uqq" },
2474 { HQmode, "hq" },
2475 { UHQmode, "uhq" },
2476 { SQmode, "sq" },
2477 { USQmode, "usq" },
2478 { DQmode, "dq" },
2479 { UDQmode, "udq" },
2480 { TQmode, "tq" },
2481 { UTQmode, "utq" },
2482 { HAmode, "ha" },
2483 { UHAmode, "uha" },
2484 { SAmode, "sa" },
2485 { USAmode, "usa" },
2486 { DAmode, "da" },
2487 { UDAmode, "uda" },
2488 { TAmode, "ta" },
2489 { UTAmode, "uta" },
2490 { QImode, "qi" },
2491 { HImode, "hi" },
2492 { SImode, "si" },
2493 { DImode, "di" },
2494 { TImode, "ti" },
2495 { SFmode, "sf" },
2496 { DFmode, "df" }
2498 unsigned int i, j;
2500 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2502 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2503 "add", fixed_arith_modes[i].name, 3);
2504 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2505 "ssadd", fixed_arith_modes[i].name, 3);
2506 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2507 "usadd", fixed_arith_modes[i].name, 3);
2508 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2509 "sub", fixed_arith_modes[i].name, 3);
2510 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2511 "sssub", fixed_arith_modes[i].name, 3);
2512 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2513 "ussub", fixed_arith_modes[i].name, 3);
2514 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2515 "mul", fixed_arith_modes[i].name, 3);
2516 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2517 "ssmul", fixed_arith_modes[i].name, 3);
2518 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2519 "usmul", fixed_arith_modes[i].name, 3);
2520 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2521 "div", fixed_arith_modes[i].name, 3);
2522 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2523 "udiv", fixed_arith_modes[i].name, 3);
2524 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2525 "ssdiv", fixed_arith_modes[i].name, 3);
2526 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2527 "usdiv", fixed_arith_modes[i].name, 3);
2528 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2529 "neg", fixed_arith_modes[i].name, 2);
2530 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2531 "ssneg", fixed_arith_modes[i].name, 2);
2532 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2533 "usneg", fixed_arith_modes[i].name, 2);
2534 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2535 "ashl", fixed_arith_modes[i].name, 3);
2536 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2537 "ashr", fixed_arith_modes[i].name, 3);
2538 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2539 "lshr", fixed_arith_modes[i].name, 3);
2540 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2541 "ssashl", fixed_arith_modes[i].name, 3);
2542 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2543 "usashl", fixed_arith_modes[i].name, 3);
2544 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2545 "cmp", fixed_arith_modes[i].name, 2);
2548 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2549 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2551 if (i == j
2552 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2553 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2554 continue;
2556 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2557 fixed_conv_modes[j].mode, "fract",
2558 fixed_conv_modes[i].name,
2559 fixed_conv_modes[j].name);
2560 arm_set_fixed_conv_libfunc (satfract_optab,
2561 fixed_conv_modes[i].mode,
2562 fixed_conv_modes[j].mode, "satfract",
2563 fixed_conv_modes[i].name,
2564 fixed_conv_modes[j].name);
2565 arm_set_fixed_conv_libfunc (fractuns_optab,
2566 fixed_conv_modes[i].mode,
2567 fixed_conv_modes[j].mode, "fractuns",
2568 fixed_conv_modes[i].name,
2569 fixed_conv_modes[j].name);
2570 arm_set_fixed_conv_libfunc (satfractuns_optab,
2571 fixed_conv_modes[i].mode,
2572 fixed_conv_modes[j].mode, "satfractuns",
2573 fixed_conv_modes[i].name,
2574 fixed_conv_modes[j].name);
2578 if (TARGET_AAPCS_BASED)
2579 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2582 /* On AAPCS systems, this is the "struct __va_list". */
2583 static GTY(()) tree va_list_type;
2585 /* Return the type to use as __builtin_va_list. */
2586 static tree
2587 arm_build_builtin_va_list (void)
2589 tree va_list_name;
2590 tree ap_field;
2592 if (!TARGET_AAPCS_BASED)
2593 return std_build_builtin_va_list ();
2595 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2596 defined as:
2598 struct __va_list
2600 void *__ap;
2603 The C Library ABI further reinforces this definition in \S
2604 4.1.
2606 We must follow this definition exactly. The structure tag
2607 name is visible in C++ mangled names, and thus forms a part
2608 of the ABI. The field name may be used by people who
2609 #include <stdarg.h>. */
2610 /* Create the type. */
2611 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2612 /* Give it the required name. */
2613 va_list_name = build_decl (BUILTINS_LOCATION,
2614 TYPE_DECL,
2615 get_identifier ("__va_list"),
2616 va_list_type);
2617 DECL_ARTIFICIAL (va_list_name) = 1;
2618 TYPE_NAME (va_list_type) = va_list_name;
2619 TYPE_STUB_DECL (va_list_type) = va_list_name;
2620 /* Create the __ap field. */
2621 ap_field = build_decl (BUILTINS_LOCATION,
2622 FIELD_DECL,
2623 get_identifier ("__ap"),
2624 ptr_type_node);
2625 DECL_ARTIFICIAL (ap_field) = 1;
2626 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2627 TYPE_FIELDS (va_list_type) = ap_field;
2628 /* Compute its layout. */
2629 layout_type (va_list_type);
2631 return va_list_type;
2634 /* Return an expression of type "void *" pointing to the next
2635 available argument in a variable-argument list. VALIST is the
2636 user-level va_list object, of type __builtin_va_list. */
2637 static tree
2638 arm_extract_valist_ptr (tree valist)
2640 if (TREE_TYPE (valist) == error_mark_node)
2641 return error_mark_node;
2643 /* On an AAPCS target, the pointer is stored within "struct
2644 va_list". */
2645 if (TARGET_AAPCS_BASED)
2647 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2648 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2649 valist, ap_field, NULL_TREE);
2652 return valist;
2655 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2656 static void
2657 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2659 valist = arm_extract_valist_ptr (valist);
2660 std_expand_builtin_va_start (valist, nextarg);
2663 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2664 static tree
2665 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2666 gimple_seq *post_p)
2668 valist = arm_extract_valist_ptr (valist);
2669 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2672 /* Fix up any incompatible options that the user has specified. */
2673 static void
2674 arm_option_override (void)
2676 arm_selected_arch = NULL;
2677 arm_selected_cpu = NULL;
2678 arm_selected_tune = NULL;
2680 if (global_options_set.x_arm_arch_option)
2681 arm_selected_arch = &all_architectures[arm_arch_option];
2683 if (global_options_set.x_arm_cpu_option)
2685 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2686 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2689 if (global_options_set.x_arm_tune_option)
2690 arm_selected_tune = &all_cores[(int) arm_tune_option];
2692 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2693 SUBTARGET_OVERRIDE_OPTIONS;
2694 #endif
2696 if (arm_selected_arch)
2698 if (arm_selected_cpu)
2700 /* Check for conflict between mcpu and march. */
2701 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2703 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2704 arm_selected_cpu->name, arm_selected_arch->name);
2705 /* -march wins for code generation.
2706 -mcpu wins for default tuning. */
2707 if (!arm_selected_tune)
2708 arm_selected_tune = arm_selected_cpu;
2710 arm_selected_cpu = arm_selected_arch;
2712 else
2713 /* -mcpu wins. */
2714 arm_selected_arch = NULL;
2716 else
2717 /* Pick a CPU based on the architecture. */
2718 arm_selected_cpu = arm_selected_arch;
2721 /* If the user did not specify a processor, choose one for them. */
2722 if (!arm_selected_cpu)
2724 const struct processors * sel;
2725 unsigned int sought;
2727 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2728 if (!arm_selected_cpu->name)
2730 #ifdef SUBTARGET_CPU_DEFAULT
2731 /* Use the subtarget default CPU if none was specified by
2732 configure. */
2733 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2734 #endif
2735 /* Default to ARM6. */
2736 if (!arm_selected_cpu->name)
2737 arm_selected_cpu = &all_cores[arm6];
2740 sel = arm_selected_cpu;
2741 insn_flags = sel->flags;
2743 /* Now check to see if the user has specified some command line
2744 switch that require certain abilities from the cpu. */
2745 sought = 0;
2747 if (TARGET_INTERWORK || TARGET_THUMB)
2749 sought |= (FL_THUMB | FL_MODE32);
2751 /* There are no ARM processors that support both APCS-26 and
2752 interworking. Therefore we force FL_MODE26 to be removed
2753 from insn_flags here (if it was set), so that the search
2754 below will always be able to find a compatible processor. */
2755 insn_flags &= ~FL_MODE26;
2758 if (sought != 0 && ((sought & insn_flags) != sought))
2760 /* Try to locate a CPU type that supports all of the abilities
2761 of the default CPU, plus the extra abilities requested by
2762 the user. */
2763 for (sel = all_cores; sel->name != NULL; sel++)
2764 if ((sel->flags & sought) == (sought | insn_flags))
2765 break;
2767 if (sel->name == NULL)
2769 unsigned current_bit_count = 0;
2770 const struct processors * best_fit = NULL;
2772 /* Ideally we would like to issue an error message here
2773 saying that it was not possible to find a CPU compatible
2774 with the default CPU, but which also supports the command
2775 line options specified by the programmer, and so they
2776 ought to use the -mcpu=<name> command line option to
2777 override the default CPU type.
2779 If we cannot find a cpu that has both the
2780 characteristics of the default cpu and the given
2781 command line options we scan the array again looking
2782 for a best match. */
2783 for (sel = all_cores; sel->name != NULL; sel++)
2784 if ((sel->flags & sought) == sought)
2786 unsigned count;
2788 count = bit_count (sel->flags & insn_flags);
2790 if (count >= current_bit_count)
2792 best_fit = sel;
2793 current_bit_count = count;
2797 gcc_assert (best_fit);
2798 sel = best_fit;
2801 arm_selected_cpu = sel;
2805 gcc_assert (arm_selected_cpu);
2806 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2807 if (!arm_selected_tune)
2808 arm_selected_tune = &all_cores[arm_selected_cpu->core];
2810 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2811 insn_flags = arm_selected_cpu->flags;
2812 arm_base_arch = arm_selected_cpu->base_arch;
2814 arm_tune = arm_selected_tune->core;
2815 tune_flags = arm_selected_tune->flags;
2816 current_tune = arm_selected_tune->tune;
2818 /* Make sure that the processor choice does not conflict with any of the
2819 other command line choices. */
2820 if (TARGET_ARM && !(insn_flags & FL_NOTM))
2821 error ("target CPU does not support ARM mode");
2823 /* BPABI targets use linker tricks to allow interworking on cores
2824 without thumb support. */
2825 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2827 warning (0, "target CPU does not support interworking" );
2828 target_flags &= ~MASK_INTERWORK;
2831 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2833 warning (0, "target CPU does not support THUMB instructions");
2834 target_flags &= ~MASK_THUMB;
2837 if (TARGET_APCS_FRAME && TARGET_THUMB)
2839 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2840 target_flags &= ~MASK_APCS_FRAME;
2843 /* Callee super interworking implies thumb interworking. Adding
2844 this to the flags here simplifies the logic elsewhere. */
2845 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2846 target_flags |= MASK_INTERWORK;
2848 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2849 from here where no function is being compiled currently. */
2850 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2851 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2853 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2854 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2856 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2858 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2859 target_flags |= MASK_APCS_FRAME;
2862 if (TARGET_POKE_FUNCTION_NAME)
2863 target_flags |= MASK_APCS_FRAME;
2865 if (TARGET_APCS_REENT && flag_pic)
2866 error ("-fpic and -mapcs-reent are incompatible");
2868 if (TARGET_APCS_REENT)
2869 warning (0, "APCS reentrant code not supported. Ignored");
2871 /* If this target is normally configured to use APCS frames, warn if they
2872 are turned off and debugging is turned on. */
2873 if (TARGET_ARM
2874 && write_symbols != NO_DEBUG
2875 && !TARGET_APCS_FRAME
2876 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2877 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2879 if (TARGET_APCS_FLOAT)
2880 warning (0, "passing floating point arguments in fp regs not yet supported");
2882 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2883 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2884 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2885 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2886 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2887 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2888 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2889 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2890 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2891 arm_arch6m = arm_arch6 && !arm_arch_notm;
2892 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2893 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2894 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2895 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2896 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2898 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2899 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2900 thumb_code = TARGET_ARM == 0;
2901 thumb1_code = TARGET_THUMB1 != 0;
2902 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2903 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2904 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2905 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2906 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2907 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2908 arm_arch_no_volatile_ce = (insn_flags & FL_NO_VOLATILE_CE) != 0;
2909 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2910 arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2911 arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0;
2912 if (arm_restrict_it == 2)
2913 arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2915 if (!TARGET_THUMB2)
2916 arm_restrict_it = 0;
2918 /* If we are not using the default (ARM mode) section anchor offset
2919 ranges, then set the correct ranges now. */
2920 if (TARGET_THUMB1)
2922 /* Thumb-1 LDR instructions cannot have negative offsets.
2923 Permissible positive offset ranges are 5-bit (for byte loads),
2924 6-bit (for halfword loads), or 7-bit (for word loads).
2925 Empirical results suggest a 7-bit anchor range gives the best
2926 overall code size. */
2927 targetm.min_anchor_offset = 0;
2928 targetm.max_anchor_offset = 127;
2930 else if (TARGET_THUMB2)
2932 /* The minimum is set such that the total size of the block
2933 for a particular anchor is 248 + 1 + 4095 bytes, which is
2934 divisible by eight, ensuring natural spacing of anchors. */
2935 targetm.min_anchor_offset = -248;
2936 targetm.max_anchor_offset = 4095;
2939 /* V5 code we generate is completely interworking capable, so we turn off
2940 TARGET_INTERWORK here to avoid many tests later on. */
2942 /* XXX However, we must pass the right pre-processor defines to CPP
2943 or GLD can get confused. This is a hack. */
2944 if (TARGET_INTERWORK)
2945 arm_cpp_interwork = 1;
2947 if (arm_arch5)
2948 target_flags &= ~MASK_INTERWORK;
2950 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2951 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2953 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2954 error ("iwmmxt abi requires an iwmmxt capable cpu");
2956 if (!global_options_set.x_arm_fpu_index)
2958 const char *target_fpu_name;
2959 bool ok;
2961 #ifdef FPUTYPE_DEFAULT
2962 target_fpu_name = FPUTYPE_DEFAULT;
2963 #else
2964 target_fpu_name = "vfp";
2965 #endif
2967 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2968 CL_TARGET);
2969 gcc_assert (ok);
2972 arm_fpu_desc = &all_fpus[arm_fpu_index];
2974 switch (arm_fpu_desc->model)
2976 case ARM_FP_MODEL_VFP:
2977 arm_fpu_attr = FPU_VFP;
2978 break;
2980 default:
2981 gcc_unreachable();
2984 if (TARGET_AAPCS_BASED)
2986 if (TARGET_CALLER_INTERWORKING)
2987 error ("AAPCS does not support -mcaller-super-interworking");
2988 else
2989 if (TARGET_CALLEE_INTERWORKING)
2990 error ("AAPCS does not support -mcallee-super-interworking");
2993 /* iWMMXt and NEON are incompatible. */
2994 if (TARGET_IWMMXT && TARGET_NEON)
2995 error ("iWMMXt and NEON are incompatible");
2997 /* iWMMXt unsupported under Thumb mode. */
2998 if (TARGET_THUMB && TARGET_IWMMXT)
2999 error ("iWMMXt unsupported under Thumb mode");
3001 /* __fp16 support currently assumes the core has ldrh. */
3002 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3003 sorry ("__fp16 and no ldrh");
3005 /* If soft-float is specified then don't use FPU. */
3006 if (TARGET_SOFT_FLOAT)
3007 arm_fpu_attr = FPU_NONE;
3009 if (TARGET_AAPCS_BASED)
3011 if (arm_abi == ARM_ABI_IWMMXT)
3012 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3013 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
3014 && TARGET_HARD_FLOAT
3015 && TARGET_VFP)
3016 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3017 else
3018 arm_pcs_default = ARM_PCS_AAPCS;
3020 else
3022 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
3023 sorry ("-mfloat-abi=hard and VFP");
3025 if (arm_abi == ARM_ABI_APCS)
3026 arm_pcs_default = ARM_PCS_APCS;
3027 else
3028 arm_pcs_default = ARM_PCS_ATPCS;
3031 /* For arm2/3 there is no need to do any scheduling if we are doing
3032 software floating-point. */
3033 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
3034 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3036 /* Use the cp15 method if it is available. */
3037 if (target_thread_pointer == TP_AUTO)
3039 if (arm_arch6k && !TARGET_THUMB1)
3040 target_thread_pointer = TP_CP15;
3041 else
3042 target_thread_pointer = TP_SOFT;
3045 if (TARGET_HARD_TP && TARGET_THUMB1)
3046 error ("can not use -mtp=cp15 with 16-bit Thumb");
3048 /* Override the default structure alignment for AAPCS ABI. */
3049 if (!global_options_set.x_arm_structure_size_boundary)
3051 if (TARGET_AAPCS_BASED)
3052 arm_structure_size_boundary = 8;
3054 else
3056 if (arm_structure_size_boundary != 8
3057 && arm_structure_size_boundary != 32
3058 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3060 if (ARM_DOUBLEWORD_ALIGN)
3061 warning (0,
3062 "structure size boundary can only be set to 8, 32 or 64");
3063 else
3064 warning (0, "structure size boundary can only be set to 8 or 32");
3065 arm_structure_size_boundary
3066 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3070 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
3072 error ("RTP PIC is incompatible with Thumb");
3073 flag_pic = 0;
3076 /* If stack checking is disabled, we can use r10 as the PIC register,
3077 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3078 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3080 if (TARGET_VXWORKS_RTP)
3081 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3082 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3085 if (flag_pic && TARGET_VXWORKS_RTP)
3086 arm_pic_register = 9;
3088 if (arm_pic_register_string != NULL)
3090 int pic_register = decode_reg_name (arm_pic_register_string);
3092 if (!flag_pic)
3093 warning (0, "-mpic-register= is useless without -fpic");
3095 /* Prevent the user from choosing an obviously stupid PIC register. */
3096 else if (pic_register < 0 || call_used_regs[pic_register]
3097 || pic_register == HARD_FRAME_POINTER_REGNUM
3098 || pic_register == STACK_POINTER_REGNUM
3099 || pic_register >= PC_REGNUM
3100 || (TARGET_VXWORKS_RTP
3101 && (unsigned int) pic_register != arm_pic_register))
3102 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3103 else
3104 arm_pic_register = pic_register;
3107 if (TARGET_VXWORKS_RTP
3108 && !global_options_set.x_arm_pic_data_is_text_relative)
3109 arm_pic_data_is_text_relative = 0;
3111 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3112 if (fix_cm3_ldrd == 2)
3114 if (arm_selected_cpu->core == cortexm3)
3115 fix_cm3_ldrd = 1;
3116 else
3117 fix_cm3_ldrd = 0;
3120 /* Enable -munaligned-access by default for
3121 - all ARMv6 architecture-based processors
3122 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3123 - ARMv8 architecture-base processors.
3125 Disable -munaligned-access by default for
3126 - all pre-ARMv6 architecture-based processors
3127 - ARMv6-M architecture-based processors. */
3129 if (unaligned_access == 2)
3131 if (arm_arch6 && (arm_arch_notm || arm_arch7))
3132 unaligned_access = 1;
3133 else
3134 unaligned_access = 0;
3136 else if (unaligned_access == 1
3137 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3139 warning (0, "target CPU does not support unaligned accesses");
3140 unaligned_access = 0;
3143 if (TARGET_THUMB1 && flag_schedule_insns)
3145 /* Don't warn since it's on by default in -O2. */
3146 flag_schedule_insns = 0;
3149 if (optimize_size)
3151 /* If optimizing for size, bump the number of instructions that we
3152 are prepared to conditionally execute (even on a StrongARM). */
3153 max_insns_skipped = 6;
3155 /* For THUMB2, we limit the conditional sequence to one IT block. */
3156 if (TARGET_THUMB2)
3157 max_insns_skipped = MAX_INSN_PER_IT_BLOCK;
3159 else
3160 max_insns_skipped = current_tune->max_insns_skipped;
3162 /* Hot/Cold partitioning is not currently supported, since we can't
3163 handle literal pool placement in that case. */
3164 if (flag_reorder_blocks_and_partition)
3166 inform (input_location,
3167 "-freorder-blocks-and-partition not supported on this architecture");
3168 flag_reorder_blocks_and_partition = 0;
3169 flag_reorder_blocks = 1;
3172 if (flag_pic)
3173 /* Hoisting PIC address calculations more aggressively provides a small,
3174 but measurable, size reduction for PIC code. Therefore, we decrease
3175 the bar for unrestricted expression hoisting to the cost of PIC address
3176 calculation, which is 2 instructions. */
3177 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3178 global_options.x_param_values,
3179 global_options_set.x_param_values);
3181 /* ARM EABI defaults to strict volatile bitfields. */
3182 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3183 && abi_version_at_least(2))
3184 flag_strict_volatile_bitfields = 1;
3186 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
3187 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
3188 if (flag_prefetch_loop_arrays < 0
3189 && HAVE_prefetch
3190 && optimize >= 3
3191 && current_tune->num_prefetch_slots > 0)
3192 flag_prefetch_loop_arrays = 1;
3194 /* Set up parameters to be used in prefetching algorithm. Do not override the
3195 defaults unless we are tuning for a core we have researched values for. */
3196 if (current_tune->num_prefetch_slots > 0)
3197 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3198 current_tune->num_prefetch_slots,
3199 global_options.x_param_values,
3200 global_options_set.x_param_values);
3201 if (current_tune->l1_cache_line_size >= 0)
3202 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3203 current_tune->l1_cache_line_size,
3204 global_options.x_param_values,
3205 global_options_set.x_param_values);
3206 if (current_tune->l1_cache_size >= 0)
3207 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3208 current_tune->l1_cache_size,
3209 global_options.x_param_values,
3210 global_options_set.x_param_values);
3212 /* Use Neon to perform 64-bits operations rather than core
3213 registers. */
3214 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3215 if (use_neon_for_64bits == 1)
3216 prefer_neon_for_64bits = true;
3218 /* Use the alternative scheduling-pressure algorithm by default. */
3219 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3220 global_options.x_param_values,
3221 global_options_set.x_param_values);
3223 /* Look through ready list and all of queue for instructions
3224 relevant for L2 auto-prefetcher. */
3225 int param_sched_autopref_queue_depth;
3226 if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_OFF)
3227 param_sched_autopref_queue_depth = -1;
3228 else if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_RANK)
3229 param_sched_autopref_queue_depth = 0;
3230 else if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_FULL)
3231 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3232 else
3233 gcc_unreachable ();
3234 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3235 param_sched_autopref_queue_depth,
3236 global_options.x_param_values,
3237 global_options_set.x_param_values);
3239 /* Disable shrink-wrap when optimizing function for size, since it tends to
3240 generate additional returns. */
3241 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
3242 flag_shrink_wrap = false;
3243 /* TBD: Dwarf info for apcs frame is not handled yet. */
3244 if (TARGET_APCS_FRAME)
3245 flag_shrink_wrap = false;
3247 /* We only support -mslow-flash-data on armv7-m targets. */
3248 if (target_slow_flash_data
3249 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
3250 || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
3251 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
3253 /* Currently, for slow flash data, we just disable literal pools. */
3254 if (target_slow_flash_data)
3255 arm_disable_literal_pool = true;
3257 /* Thumb2 inline assembly code should always use unified syntax.
3258 This will apply to ARM and Thumb1 eventually. */
3259 if (TARGET_THUMB2)
3260 inline_asm_unified = 1;
3262 /* Disable scheduling fusion by default if it's not armv7 processor
3263 or doesn't prefer ldrd/strd. */
3264 if (flag_schedule_fusion == 2
3265 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3266 flag_schedule_fusion = 0;
3268 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3269 - epilogue_insns - does not accurately model the corresponding insns
3270 emitted in the asm file. In particular, see the comment in thumb_exit
3271 'Find out how many of the (return) argument registers we can corrupt'.
3272 As a consequence, the epilogue may clobber registers without fipa-ra
3273 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3274 TODO: Accurately model clobbers for epilogue_insns and reenable
3275 fipa-ra. */
3276 if (TARGET_THUMB1)
3277 flag_ipa_ra = 0;
3279 /* Register global variables with the garbage collector. */
3280 arm_add_gc_roots ();
3283 static void
3284 arm_add_gc_roots (void)
3286 gcc_obstack_init(&minipool_obstack);
3287 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3290 /* A table of known ARM exception types.
3291 For use with the interrupt function attribute. */
3293 typedef struct
3295 const char *const arg;
3296 const unsigned long return_value;
3298 isr_attribute_arg;
3300 static const isr_attribute_arg isr_attribute_args [] =
3302 { "IRQ", ARM_FT_ISR },
3303 { "irq", ARM_FT_ISR },
3304 { "FIQ", ARM_FT_FIQ },
3305 { "fiq", ARM_FT_FIQ },
3306 { "ABORT", ARM_FT_ISR },
3307 { "abort", ARM_FT_ISR },
3308 { "ABORT", ARM_FT_ISR },
3309 { "abort", ARM_FT_ISR },
3310 { "UNDEF", ARM_FT_EXCEPTION },
3311 { "undef", ARM_FT_EXCEPTION },
3312 { "SWI", ARM_FT_EXCEPTION },
3313 { "swi", ARM_FT_EXCEPTION },
3314 { NULL, ARM_FT_NORMAL }
3317 /* Returns the (interrupt) function type of the current
3318 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3320 static unsigned long
3321 arm_isr_value (tree argument)
3323 const isr_attribute_arg * ptr;
3324 const char * arg;
3326 if (!arm_arch_notm)
3327 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3329 /* No argument - default to IRQ. */
3330 if (argument == NULL_TREE)
3331 return ARM_FT_ISR;
3333 /* Get the value of the argument. */
3334 if (TREE_VALUE (argument) == NULL_TREE
3335 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3336 return ARM_FT_UNKNOWN;
3338 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3340 /* Check it against the list of known arguments. */
3341 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3342 if (streq (arg, ptr->arg))
3343 return ptr->return_value;
3345 /* An unrecognized interrupt type. */
3346 return ARM_FT_UNKNOWN;
3349 /* Computes the type of the current function. */
3351 static unsigned long
3352 arm_compute_func_type (void)
3354 unsigned long type = ARM_FT_UNKNOWN;
3355 tree a;
3356 tree attr;
3358 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3360 /* Decide if the current function is volatile. Such functions
3361 never return, and many memory cycles can be saved by not storing
3362 register values that will never be needed again. This optimization
3363 was added to speed up context switching in a kernel application. */
3364 if (optimize > 0
3365 && (TREE_NOTHROW (current_function_decl)
3366 || !(flag_unwind_tables
3367 || (flag_exceptions
3368 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3369 && TREE_THIS_VOLATILE (current_function_decl))
3370 type |= ARM_FT_VOLATILE;
3372 if (cfun->static_chain_decl != NULL)
3373 type |= ARM_FT_NESTED;
3375 attr = DECL_ATTRIBUTES (current_function_decl);
3377 a = lookup_attribute ("naked", attr);
3378 if (a != NULL_TREE)
3379 type |= ARM_FT_NAKED;
3381 a = lookup_attribute ("isr", attr);
3382 if (a == NULL_TREE)
3383 a = lookup_attribute ("interrupt", attr);
3385 if (a == NULL_TREE)
3386 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3387 else
3388 type |= arm_isr_value (TREE_VALUE (a));
3390 return type;
3393 /* Returns the type of the current function. */
3395 unsigned long
3396 arm_current_func_type (void)
3398 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3399 cfun->machine->func_type = arm_compute_func_type ();
3401 return cfun->machine->func_type;
3404 bool
3405 arm_allocate_stack_slots_for_args (void)
3407 /* Naked functions should not allocate stack slots for arguments. */
3408 return !IS_NAKED (arm_current_func_type ());
3411 static bool
3412 arm_warn_func_return (tree decl)
3414 /* Naked functions are implemented entirely in assembly, including the
3415 return sequence, so suppress warnings about this. */
3416 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3420 /* Output assembler code for a block containing the constant parts
3421 of a trampoline, leaving space for the variable parts.
3423 On the ARM, (if r8 is the static chain regnum, and remembering that
3424 referencing pc adds an offset of 8) the trampoline looks like:
3425 ldr r8, [pc, #0]
3426 ldr pc, [pc]
3427 .word static chain value
3428 .word function's address
3429 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3431 static void
3432 arm_asm_trampoline_template (FILE *f)
3434 if (TARGET_ARM)
3436 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3437 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3439 else if (TARGET_THUMB2)
3441 /* The Thumb-2 trampoline is similar to the arm implementation.
3442 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3443 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3444 STATIC_CHAIN_REGNUM, PC_REGNUM);
3445 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3447 else
3449 ASM_OUTPUT_ALIGN (f, 2);
3450 fprintf (f, "\t.code\t16\n");
3451 fprintf (f, ".Ltrampoline_start:\n");
3452 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3453 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3454 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3455 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3456 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3457 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3459 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3460 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3463 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3465 static void
3466 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3468 rtx fnaddr, mem, a_tramp;
3470 emit_block_move (m_tramp, assemble_trampoline_template (),
3471 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3473 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3474 emit_move_insn (mem, chain_value);
3476 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3477 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3478 emit_move_insn (mem, fnaddr);
3480 a_tramp = XEXP (m_tramp, 0);
3481 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3482 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3483 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3486 /* Thumb trampolines should be entered in thumb mode, so set
3487 the bottom bit of the address. */
3489 static rtx
3490 arm_trampoline_adjust_address (rtx addr)
3492 if (TARGET_THUMB)
3493 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3494 NULL, 0, OPTAB_LIB_WIDEN);
3495 return addr;
3498 /* Return 1 if it is possible to return using a single instruction.
3499 If SIBLING is non-null, this is a test for a return before a sibling
3500 call. SIBLING is the call insn, so we can examine its register usage. */
3503 use_return_insn (int iscond, rtx sibling)
3505 int regno;
3506 unsigned int func_type;
3507 unsigned long saved_int_regs;
3508 unsigned HOST_WIDE_INT stack_adjust;
3509 arm_stack_offsets *offsets;
3511 /* Never use a return instruction before reload has run. */
3512 if (!reload_completed)
3513 return 0;
3515 func_type = arm_current_func_type ();
3517 /* Naked, volatile and stack alignment functions need special
3518 consideration. */
3519 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3520 return 0;
3522 /* So do interrupt functions that use the frame pointer and Thumb
3523 interrupt functions. */
3524 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3525 return 0;
3527 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3528 && !optimize_function_for_size_p (cfun))
3529 return 0;
3531 offsets = arm_get_frame_offsets ();
3532 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3534 /* As do variadic functions. */
3535 if (crtl->args.pretend_args_size
3536 || cfun->machine->uses_anonymous_args
3537 /* Or if the function calls __builtin_eh_return () */
3538 || crtl->calls_eh_return
3539 /* Or if the function calls alloca */
3540 || cfun->calls_alloca
3541 /* Or if there is a stack adjustment. However, if the stack pointer
3542 is saved on the stack, we can use a pre-incrementing stack load. */
3543 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3544 && stack_adjust == 4)))
3545 return 0;
3547 saved_int_regs = offsets->saved_regs_mask;
3549 /* Unfortunately, the insn
3551 ldmib sp, {..., sp, ...}
3553 triggers a bug on most SA-110 based devices, such that the stack
3554 pointer won't be correctly restored if the instruction takes a
3555 page fault. We work around this problem by popping r3 along with
3556 the other registers, since that is never slower than executing
3557 another instruction.
3559 We test for !arm_arch5 here, because code for any architecture
3560 less than this could potentially be run on one of the buggy
3561 chips. */
3562 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3564 /* Validate that r3 is a call-clobbered register (always true in
3565 the default abi) ... */
3566 if (!call_used_regs[3])
3567 return 0;
3569 /* ... that it isn't being used for a return value ... */
3570 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3571 return 0;
3573 /* ... or for a tail-call argument ... */
3574 if (sibling)
3576 gcc_assert (CALL_P (sibling));
3578 if (find_regno_fusage (sibling, USE, 3))
3579 return 0;
3582 /* ... and that there are no call-saved registers in r0-r2
3583 (always true in the default ABI). */
3584 if (saved_int_regs & 0x7)
3585 return 0;
3588 /* Can't be done if interworking with Thumb, and any registers have been
3589 stacked. */
3590 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3591 return 0;
3593 /* On StrongARM, conditional returns are expensive if they aren't
3594 taken and multiple registers have been stacked. */
3595 if (iscond && arm_tune_strongarm)
3597 /* Conditional return when just the LR is stored is a simple
3598 conditional-load instruction, that's not expensive. */
3599 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3600 return 0;
3602 if (flag_pic
3603 && arm_pic_register != INVALID_REGNUM
3604 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3605 return 0;
3608 /* If there are saved registers but the LR isn't saved, then we need
3609 two instructions for the return. */
3610 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3611 return 0;
3613 /* Can't be done if any of the VFP regs are pushed,
3614 since this also requires an insn. */
3615 if (TARGET_HARD_FLOAT && TARGET_VFP)
3616 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3617 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3618 return 0;
3620 if (TARGET_REALLY_IWMMXT)
3621 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3622 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3623 return 0;
3625 return 1;
3628 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3629 shrink-wrapping if possible. This is the case if we need to emit a
3630 prologue, which we can test by looking at the offsets. */
3631 bool
3632 use_simple_return_p (void)
3634 arm_stack_offsets *offsets;
3636 offsets = arm_get_frame_offsets ();
3637 return offsets->outgoing_args != 0;
3640 /* Return TRUE if int I is a valid immediate ARM constant. */
3643 const_ok_for_arm (HOST_WIDE_INT i)
3645 int lowbit;
3647 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3648 be all zero, or all one. */
3649 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3650 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3651 != ((~(unsigned HOST_WIDE_INT) 0)
3652 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3653 return FALSE;
3655 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3657 /* Fast return for 0 and small values. We must do this for zero, since
3658 the code below can't handle that one case. */
3659 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3660 return TRUE;
3662 /* Get the number of trailing zeros. */
3663 lowbit = ffs((int) i) - 1;
3665 /* Only even shifts are allowed in ARM mode so round down to the
3666 nearest even number. */
3667 if (TARGET_ARM)
3668 lowbit &= ~1;
3670 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3671 return TRUE;
3673 if (TARGET_ARM)
3675 /* Allow rotated constants in ARM mode. */
3676 if (lowbit <= 4
3677 && ((i & ~0xc000003f) == 0
3678 || (i & ~0xf000000f) == 0
3679 || (i & ~0xfc000003) == 0))
3680 return TRUE;
3682 else
3684 HOST_WIDE_INT v;
3686 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3687 v = i & 0xff;
3688 v |= v << 16;
3689 if (i == v || i == (v | (v << 8)))
3690 return TRUE;
3692 /* Allow repeated pattern 0xXY00XY00. */
3693 v = i & 0xff00;
3694 v |= v << 16;
3695 if (i == v)
3696 return TRUE;
3699 return FALSE;
3702 /* Return true if I is a valid constant for the operation CODE. */
3704 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3706 if (const_ok_for_arm (i))
3707 return 1;
3709 switch (code)
3711 case SET:
3712 /* See if we can use movw. */
3713 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3714 return 1;
3715 else
3716 /* Otherwise, try mvn. */
3717 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3719 case PLUS:
3720 /* See if we can use addw or subw. */
3721 if (TARGET_THUMB2
3722 && ((i & 0xfffff000) == 0
3723 || ((-i) & 0xfffff000) == 0))
3724 return 1;
3725 /* else fall through. */
3727 case COMPARE:
3728 case EQ:
3729 case NE:
3730 case GT:
3731 case LE:
3732 case LT:
3733 case GE:
3734 case GEU:
3735 case LTU:
3736 case GTU:
3737 case LEU:
3738 case UNORDERED:
3739 case ORDERED:
3740 case UNEQ:
3741 case UNGE:
3742 case UNLT:
3743 case UNGT:
3744 case UNLE:
3745 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3747 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3748 case XOR:
3749 return 0;
3751 case IOR:
3752 if (TARGET_THUMB2)
3753 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3754 return 0;
3756 case AND:
3757 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3759 default:
3760 gcc_unreachable ();
3764 /* Return true if I is a valid di mode constant for the operation CODE. */
3766 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3768 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3769 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3770 rtx hi = GEN_INT (hi_val);
3771 rtx lo = GEN_INT (lo_val);
3773 if (TARGET_THUMB1)
3774 return 0;
3776 switch (code)
3778 case AND:
3779 case IOR:
3780 case XOR:
3781 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3782 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3783 case PLUS:
3784 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3786 default:
3787 return 0;
3791 /* Emit a sequence of insns to handle a large constant.
3792 CODE is the code of the operation required, it can be any of SET, PLUS,
3793 IOR, AND, XOR, MINUS;
3794 MODE is the mode in which the operation is being performed;
3795 VAL is the integer to operate on;
3796 SOURCE is the other operand (a register, or a null-pointer for SET);
3797 SUBTARGETS means it is safe to create scratch registers if that will
3798 either produce a simpler sequence, or we will want to cse the values.
3799 Return value is the number of insns emitted. */
3801 /* ??? Tweak this for thumb2. */
3803 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
3804 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3806 rtx cond;
3808 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3809 cond = COND_EXEC_TEST (PATTERN (insn));
3810 else
3811 cond = NULL_RTX;
3813 if (subtargets || code == SET
3814 || (REG_P (target) && REG_P (source)
3815 && REGNO (target) != REGNO (source)))
3817 /* After arm_reorg has been called, we can't fix up expensive
3818 constants by pushing them into memory so we must synthesize
3819 them in-line, regardless of the cost. This is only likely to
3820 be more costly on chips that have load delay slots and we are
3821 compiling without running the scheduler (so no splitting
3822 occurred before the final instruction emission).
3824 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3826 if (!cfun->machine->after_arm_reorg
3827 && !cond
3828 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3829 1, 0)
3830 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3831 + (code != SET))))
3833 if (code == SET)
3835 /* Currently SET is the only monadic value for CODE, all
3836 the rest are diadic. */
3837 if (TARGET_USE_MOVT)
3838 arm_emit_movpair (target, GEN_INT (val));
3839 else
3840 emit_set_insn (target, GEN_INT (val));
3842 return 1;
3844 else
3846 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3848 if (TARGET_USE_MOVT)
3849 arm_emit_movpair (temp, GEN_INT (val));
3850 else
3851 emit_set_insn (temp, GEN_INT (val));
3853 /* For MINUS, the value is subtracted from, since we never
3854 have subtraction of a constant. */
3855 if (code == MINUS)
3856 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3857 else
3858 emit_set_insn (target,
3859 gen_rtx_fmt_ee (code, mode, source, temp));
3860 return 2;
3865 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3869 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3870 ARM/THUMB2 immediates, and add up to VAL.
3871 Thr function return value gives the number of insns required. */
3872 static int
3873 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3874 struct four_ints *return_sequence)
3876 int best_consecutive_zeros = 0;
3877 int i;
3878 int best_start = 0;
3879 int insns1, insns2;
3880 struct four_ints tmp_sequence;
3882 /* If we aren't targeting ARM, the best place to start is always at
3883 the bottom, otherwise look more closely. */
3884 if (TARGET_ARM)
3886 for (i = 0; i < 32; i += 2)
3888 int consecutive_zeros = 0;
3890 if (!(val & (3 << i)))
3892 while ((i < 32) && !(val & (3 << i)))
3894 consecutive_zeros += 2;
3895 i += 2;
3897 if (consecutive_zeros > best_consecutive_zeros)
3899 best_consecutive_zeros = consecutive_zeros;
3900 best_start = i - consecutive_zeros;
3902 i -= 2;
3907 /* So long as it won't require any more insns to do so, it's
3908 desirable to emit a small constant (in bits 0...9) in the last
3909 insn. This way there is more chance that it can be combined with
3910 a later addressing insn to form a pre-indexed load or store
3911 operation. Consider:
3913 *((volatile int *)0xe0000100) = 1;
3914 *((volatile int *)0xe0000110) = 2;
3916 We want this to wind up as:
3918 mov rA, #0xe0000000
3919 mov rB, #1
3920 str rB, [rA, #0x100]
3921 mov rB, #2
3922 str rB, [rA, #0x110]
3924 rather than having to synthesize both large constants from scratch.
3926 Therefore, we calculate how many insns would be required to emit
3927 the constant starting from `best_start', and also starting from
3928 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3929 yield a shorter sequence, we may as well use zero. */
3930 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3931 if (best_start != 0
3932 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3934 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3935 if (insns2 <= insns1)
3937 *return_sequence = tmp_sequence;
3938 insns1 = insns2;
3942 return insns1;
3945 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3946 static int
3947 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3948 struct four_ints *return_sequence, int i)
3950 int remainder = val & 0xffffffff;
3951 int insns = 0;
3953 /* Try and find a way of doing the job in either two or three
3954 instructions.
3956 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3957 location. We start at position I. This may be the MSB, or
3958 optimial_immediate_sequence may have positioned it at the largest block
3959 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3960 wrapping around to the top of the word when we drop off the bottom.
3961 In the worst case this code should produce no more than four insns.
3963 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3964 constants, shifted to any arbitrary location. We should always start
3965 at the MSB. */
3968 int end;
3969 unsigned int b1, b2, b3, b4;
3970 unsigned HOST_WIDE_INT result;
3971 int loc;
3973 gcc_assert (insns < 4);
3975 if (i <= 0)
3976 i += 32;
3978 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3979 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3981 loc = i;
3982 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3983 /* We can use addw/subw for the last 12 bits. */
3984 result = remainder;
3985 else
3987 /* Use an 8-bit shifted/rotated immediate. */
3988 end = i - 8;
3989 if (end < 0)
3990 end += 32;
3991 result = remainder & ((0x0ff << end)
3992 | ((i < end) ? (0xff >> (32 - end))
3993 : 0));
3994 i -= 8;
3997 else
3999 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4000 arbitrary shifts. */
4001 i -= TARGET_ARM ? 2 : 1;
4002 continue;
4005 /* Next, see if we can do a better job with a thumb2 replicated
4006 constant.
4008 We do it this way around to catch the cases like 0x01F001E0 where
4009 two 8-bit immediates would work, but a replicated constant would
4010 make it worse.
4012 TODO: 16-bit constants that don't clear all the bits, but still win.
4013 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4014 if (TARGET_THUMB2)
4016 b1 = (remainder & 0xff000000) >> 24;
4017 b2 = (remainder & 0x00ff0000) >> 16;
4018 b3 = (remainder & 0x0000ff00) >> 8;
4019 b4 = remainder & 0xff;
4021 if (loc > 24)
4023 /* The 8-bit immediate already found clears b1 (and maybe b2),
4024 but must leave b3 and b4 alone. */
4026 /* First try to find a 32-bit replicated constant that clears
4027 almost everything. We can assume that we can't do it in one,
4028 or else we wouldn't be here. */
4029 unsigned int tmp = b1 & b2 & b3 & b4;
4030 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4031 + (tmp << 24);
4032 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4033 + (tmp == b3) + (tmp == b4);
4034 if (tmp
4035 && (matching_bytes >= 3
4036 || (matching_bytes == 2
4037 && const_ok_for_op (remainder & ~tmp2, code))))
4039 /* At least 3 of the bytes match, and the fourth has at
4040 least as many bits set, or two of the bytes match
4041 and it will only require one more insn to finish. */
4042 result = tmp2;
4043 i = tmp != b1 ? 32
4044 : tmp != b2 ? 24
4045 : tmp != b3 ? 16
4046 : 8;
4049 /* Second, try to find a 16-bit replicated constant that can
4050 leave three of the bytes clear. If b2 or b4 is already
4051 zero, then we can. If the 8-bit from above would not
4052 clear b2 anyway, then we still win. */
4053 else if (b1 == b3 && (!b2 || !b4
4054 || (remainder & 0x00ff0000 & ~result)))
4056 result = remainder & 0xff00ff00;
4057 i = 24;
4060 else if (loc > 16)
4062 /* The 8-bit immediate already found clears b2 (and maybe b3)
4063 and we don't get here unless b1 is alredy clear, but it will
4064 leave b4 unchanged. */
4066 /* If we can clear b2 and b4 at once, then we win, since the
4067 8-bits couldn't possibly reach that far. */
4068 if (b2 == b4)
4070 result = remainder & 0x00ff00ff;
4071 i = 16;
4076 return_sequence->i[insns++] = result;
4077 remainder &= ~result;
4079 if (code == SET || code == MINUS)
4080 code = PLUS;
4082 while (remainder);
4084 return insns;
4087 /* Emit an instruction with the indicated PATTERN. If COND is
4088 non-NULL, conditionalize the execution of the instruction on COND
4089 being true. */
4091 static void
4092 emit_constant_insn (rtx cond, rtx pattern)
4094 if (cond)
4095 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4096 emit_insn (pattern);
4099 /* As above, but extra parameter GENERATE which, if clear, suppresses
4100 RTL generation. */
4102 static int
4103 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4104 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
4105 int generate)
4107 int can_invert = 0;
4108 int can_negate = 0;
4109 int final_invert = 0;
4110 int i;
4111 int set_sign_bit_copies = 0;
4112 int clear_sign_bit_copies = 0;
4113 int clear_zero_bit_copies = 0;
4114 int set_zero_bit_copies = 0;
4115 int insns = 0, neg_insns, inv_insns;
4116 unsigned HOST_WIDE_INT temp1, temp2;
4117 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4118 struct four_ints *immediates;
4119 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4121 /* Find out which operations are safe for a given CODE. Also do a quick
4122 check for degenerate cases; these can occur when DImode operations
4123 are split. */
4124 switch (code)
4126 case SET:
4127 can_invert = 1;
4128 break;
4130 case PLUS:
4131 can_negate = 1;
4132 break;
4134 case IOR:
4135 if (remainder == 0xffffffff)
4137 if (generate)
4138 emit_constant_insn (cond,
4139 gen_rtx_SET (VOIDmode, target,
4140 GEN_INT (ARM_SIGN_EXTEND (val))));
4141 return 1;
4144 if (remainder == 0)
4146 if (reload_completed && rtx_equal_p (target, source))
4147 return 0;
4149 if (generate)
4150 emit_constant_insn (cond,
4151 gen_rtx_SET (VOIDmode, target, source));
4152 return 1;
4154 break;
4156 case AND:
4157 if (remainder == 0)
4159 if (generate)
4160 emit_constant_insn (cond,
4161 gen_rtx_SET (VOIDmode, target, const0_rtx));
4162 return 1;
4164 if (remainder == 0xffffffff)
4166 if (reload_completed && rtx_equal_p (target, source))
4167 return 0;
4168 if (generate)
4169 emit_constant_insn (cond,
4170 gen_rtx_SET (VOIDmode, target, source));
4171 return 1;
4173 can_invert = 1;
4174 break;
4176 case XOR:
4177 if (remainder == 0)
4179 if (reload_completed && rtx_equal_p (target, source))
4180 return 0;
4181 if (generate)
4182 emit_constant_insn (cond,
4183 gen_rtx_SET (VOIDmode, target, source));
4184 return 1;
4187 if (remainder == 0xffffffff)
4189 if (generate)
4190 emit_constant_insn (cond,
4191 gen_rtx_SET (VOIDmode, target,
4192 gen_rtx_NOT (mode, source)));
4193 return 1;
4195 final_invert = 1;
4196 break;
4198 case MINUS:
4199 /* We treat MINUS as (val - source), since (source - val) is always
4200 passed as (source + (-val)). */
4201 if (remainder == 0)
4203 if (generate)
4204 emit_constant_insn (cond,
4205 gen_rtx_SET (VOIDmode, target,
4206 gen_rtx_NEG (mode, source)));
4207 return 1;
4209 if (const_ok_for_arm (val))
4211 if (generate)
4212 emit_constant_insn (cond,
4213 gen_rtx_SET (VOIDmode, target,
4214 gen_rtx_MINUS (mode, GEN_INT (val),
4215 source)));
4216 return 1;
4219 break;
4221 default:
4222 gcc_unreachable ();
4225 /* If we can do it in one insn get out quickly. */
4226 if (const_ok_for_op (val, code))
4228 if (generate)
4229 emit_constant_insn (cond,
4230 gen_rtx_SET (VOIDmode, target,
4231 (source
4232 ? gen_rtx_fmt_ee (code, mode, source,
4233 GEN_INT (val))
4234 : GEN_INT (val))));
4235 return 1;
4238 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4239 insn. */
4240 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4241 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4243 if (generate)
4245 if (mode == SImode && i == 16)
4246 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4247 smaller insn. */
4248 emit_constant_insn (cond,
4249 gen_zero_extendhisi2
4250 (target, gen_lowpart (HImode, source)));
4251 else
4252 /* Extz only supports SImode, but we can coerce the operands
4253 into that mode. */
4254 emit_constant_insn (cond,
4255 gen_extzv_t2 (gen_lowpart (SImode, target),
4256 gen_lowpart (SImode, source),
4257 GEN_INT (i), const0_rtx));
4260 return 1;
4263 /* Calculate a few attributes that may be useful for specific
4264 optimizations. */
4265 /* Count number of leading zeros. */
4266 for (i = 31; i >= 0; i--)
4268 if ((remainder & (1 << i)) == 0)
4269 clear_sign_bit_copies++;
4270 else
4271 break;
4274 /* Count number of leading 1's. */
4275 for (i = 31; i >= 0; i--)
4277 if ((remainder & (1 << i)) != 0)
4278 set_sign_bit_copies++;
4279 else
4280 break;
4283 /* Count number of trailing zero's. */
4284 for (i = 0; i <= 31; i++)
4286 if ((remainder & (1 << i)) == 0)
4287 clear_zero_bit_copies++;
4288 else
4289 break;
4292 /* Count number of trailing 1's. */
4293 for (i = 0; i <= 31; i++)
4295 if ((remainder & (1 << i)) != 0)
4296 set_zero_bit_copies++;
4297 else
4298 break;
4301 switch (code)
4303 case SET:
4304 /* See if we can do this by sign_extending a constant that is known
4305 to be negative. This is a good, way of doing it, since the shift
4306 may well merge into a subsequent insn. */
4307 if (set_sign_bit_copies > 1)
4309 if (const_ok_for_arm
4310 (temp1 = ARM_SIGN_EXTEND (remainder
4311 << (set_sign_bit_copies - 1))))
4313 if (generate)
4315 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4316 emit_constant_insn (cond,
4317 gen_rtx_SET (VOIDmode, new_src,
4318 GEN_INT (temp1)));
4319 emit_constant_insn (cond,
4320 gen_ashrsi3 (target, new_src,
4321 GEN_INT (set_sign_bit_copies - 1)));
4323 return 2;
4325 /* For an inverted constant, we will need to set the low bits,
4326 these will be shifted out of harm's way. */
4327 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4328 if (const_ok_for_arm (~temp1))
4330 if (generate)
4332 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4333 emit_constant_insn (cond,
4334 gen_rtx_SET (VOIDmode, new_src,
4335 GEN_INT (temp1)));
4336 emit_constant_insn (cond,
4337 gen_ashrsi3 (target, new_src,
4338 GEN_INT (set_sign_bit_copies - 1)));
4340 return 2;
4344 /* See if we can calculate the value as the difference between two
4345 valid immediates. */
4346 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4348 int topshift = clear_sign_bit_copies & ~1;
4350 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4351 & (0xff000000 >> topshift));
4353 /* If temp1 is zero, then that means the 9 most significant
4354 bits of remainder were 1 and we've caused it to overflow.
4355 When topshift is 0 we don't need to do anything since we
4356 can borrow from 'bit 32'. */
4357 if (temp1 == 0 && topshift != 0)
4358 temp1 = 0x80000000 >> (topshift - 1);
4360 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4362 if (const_ok_for_arm (temp2))
4364 if (generate)
4366 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4367 emit_constant_insn (cond,
4368 gen_rtx_SET (VOIDmode, new_src,
4369 GEN_INT (temp1)));
4370 emit_constant_insn (cond,
4371 gen_addsi3 (target, new_src,
4372 GEN_INT (-temp2)));
4375 return 2;
4379 /* See if we can generate this by setting the bottom (or the top)
4380 16 bits, and then shifting these into the other half of the
4381 word. We only look for the simplest cases, to do more would cost
4382 too much. Be careful, however, not to generate this when the
4383 alternative would take fewer insns. */
4384 if (val & 0xffff0000)
4386 temp1 = remainder & 0xffff0000;
4387 temp2 = remainder & 0x0000ffff;
4389 /* Overlaps outside this range are best done using other methods. */
4390 for (i = 9; i < 24; i++)
4392 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4393 && !const_ok_for_arm (temp2))
4395 rtx new_src = (subtargets
4396 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4397 : target);
4398 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4399 source, subtargets, generate);
4400 source = new_src;
4401 if (generate)
4402 emit_constant_insn
4403 (cond,
4404 gen_rtx_SET
4405 (VOIDmode, target,
4406 gen_rtx_IOR (mode,
4407 gen_rtx_ASHIFT (mode, source,
4408 GEN_INT (i)),
4409 source)));
4410 return insns + 1;
4414 /* Don't duplicate cases already considered. */
4415 for (i = 17; i < 24; i++)
4417 if (((temp1 | (temp1 >> i)) == remainder)
4418 && !const_ok_for_arm (temp1))
4420 rtx new_src = (subtargets
4421 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4422 : target);
4423 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4424 source, subtargets, generate);
4425 source = new_src;
4426 if (generate)
4427 emit_constant_insn
4428 (cond,
4429 gen_rtx_SET (VOIDmode, target,
4430 gen_rtx_IOR
4431 (mode,
4432 gen_rtx_LSHIFTRT (mode, source,
4433 GEN_INT (i)),
4434 source)));
4435 return insns + 1;
4439 break;
4441 case IOR:
4442 case XOR:
4443 /* If we have IOR or XOR, and the constant can be loaded in a
4444 single instruction, and we can find a temporary to put it in,
4445 then this can be done in two instructions instead of 3-4. */
4446 if (subtargets
4447 /* TARGET can't be NULL if SUBTARGETS is 0 */
4448 || (reload_completed && !reg_mentioned_p (target, source)))
4450 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4452 if (generate)
4454 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4456 emit_constant_insn (cond,
4457 gen_rtx_SET (VOIDmode, sub,
4458 GEN_INT (val)));
4459 emit_constant_insn (cond,
4460 gen_rtx_SET (VOIDmode, target,
4461 gen_rtx_fmt_ee (code, mode,
4462 source, sub)));
4464 return 2;
4468 if (code == XOR)
4469 break;
4471 /* Convert.
4472 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4473 and the remainder 0s for e.g. 0xfff00000)
4474 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4476 This can be done in 2 instructions by using shifts with mov or mvn.
4477 e.g. for
4478 x = x | 0xfff00000;
4479 we generate.
4480 mvn r0, r0, asl #12
4481 mvn r0, r0, lsr #12 */
4482 if (set_sign_bit_copies > 8
4483 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4485 if (generate)
4487 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4488 rtx shift = GEN_INT (set_sign_bit_copies);
4490 emit_constant_insn
4491 (cond,
4492 gen_rtx_SET (VOIDmode, sub,
4493 gen_rtx_NOT (mode,
4494 gen_rtx_ASHIFT (mode,
4495 source,
4496 shift))));
4497 emit_constant_insn
4498 (cond,
4499 gen_rtx_SET (VOIDmode, target,
4500 gen_rtx_NOT (mode,
4501 gen_rtx_LSHIFTRT (mode, sub,
4502 shift))));
4504 return 2;
4507 /* Convert
4508 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4510 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4512 For eg. r0 = r0 | 0xfff
4513 mvn r0, r0, lsr #12
4514 mvn r0, r0, asl #12
4517 if (set_zero_bit_copies > 8
4518 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4520 if (generate)
4522 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4523 rtx shift = GEN_INT (set_zero_bit_copies);
4525 emit_constant_insn
4526 (cond,
4527 gen_rtx_SET (VOIDmode, sub,
4528 gen_rtx_NOT (mode,
4529 gen_rtx_LSHIFTRT (mode,
4530 source,
4531 shift))));
4532 emit_constant_insn
4533 (cond,
4534 gen_rtx_SET (VOIDmode, target,
4535 gen_rtx_NOT (mode,
4536 gen_rtx_ASHIFT (mode, sub,
4537 shift))));
4539 return 2;
4542 /* This will never be reached for Thumb2 because orn is a valid
4543 instruction. This is for Thumb1 and the ARM 32 bit cases.
4545 x = y | constant (such that ~constant is a valid constant)
4546 Transform this to
4547 x = ~(~y & ~constant).
4549 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4551 if (generate)
4553 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4554 emit_constant_insn (cond,
4555 gen_rtx_SET (VOIDmode, sub,
4556 gen_rtx_NOT (mode, source)));
4557 source = sub;
4558 if (subtargets)
4559 sub = gen_reg_rtx (mode);
4560 emit_constant_insn (cond,
4561 gen_rtx_SET (VOIDmode, sub,
4562 gen_rtx_AND (mode, source,
4563 GEN_INT (temp1))));
4564 emit_constant_insn (cond,
4565 gen_rtx_SET (VOIDmode, target,
4566 gen_rtx_NOT (mode, sub)));
4568 return 3;
4570 break;
4572 case AND:
4573 /* See if two shifts will do 2 or more insn's worth of work. */
4574 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4576 HOST_WIDE_INT shift_mask = ((0xffffffff
4577 << (32 - clear_sign_bit_copies))
4578 & 0xffffffff);
4580 if ((remainder | shift_mask) != 0xffffffff)
4582 HOST_WIDE_INT new_val
4583 = ARM_SIGN_EXTEND (remainder | shift_mask);
4585 if (generate)
4587 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4588 insns = arm_gen_constant (AND, SImode, cond, new_val,
4589 new_src, source, subtargets, 1);
4590 source = new_src;
4592 else
4594 rtx targ = subtargets ? NULL_RTX : target;
4595 insns = arm_gen_constant (AND, mode, cond, new_val,
4596 targ, source, subtargets, 0);
4600 if (generate)
4602 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4603 rtx shift = GEN_INT (clear_sign_bit_copies);
4605 emit_insn (gen_ashlsi3 (new_src, source, shift));
4606 emit_insn (gen_lshrsi3 (target, new_src, shift));
4609 return insns + 2;
4612 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4614 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4616 if ((remainder | shift_mask) != 0xffffffff)
4618 HOST_WIDE_INT new_val
4619 = ARM_SIGN_EXTEND (remainder | shift_mask);
4620 if (generate)
4622 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4624 insns = arm_gen_constant (AND, mode, cond, new_val,
4625 new_src, source, subtargets, 1);
4626 source = new_src;
4628 else
4630 rtx targ = subtargets ? NULL_RTX : target;
4632 insns = arm_gen_constant (AND, mode, cond, new_val,
4633 targ, source, subtargets, 0);
4637 if (generate)
4639 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4640 rtx shift = GEN_INT (clear_zero_bit_copies);
4642 emit_insn (gen_lshrsi3 (new_src, source, shift));
4643 emit_insn (gen_ashlsi3 (target, new_src, shift));
4646 return insns + 2;
4649 break;
4651 default:
4652 break;
4655 /* Calculate what the instruction sequences would be if we generated it
4656 normally, negated, or inverted. */
4657 if (code == AND)
4658 /* AND cannot be split into multiple insns, so invert and use BIC. */
4659 insns = 99;
4660 else
4661 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4663 if (can_negate)
4664 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4665 &neg_immediates);
4666 else
4667 neg_insns = 99;
4669 if (can_invert || final_invert)
4670 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4671 &inv_immediates);
4672 else
4673 inv_insns = 99;
4675 immediates = &pos_immediates;
4677 /* Is the negated immediate sequence more efficient? */
4678 if (neg_insns < insns && neg_insns <= inv_insns)
4680 insns = neg_insns;
4681 immediates = &neg_immediates;
4683 else
4684 can_negate = 0;
4686 /* Is the inverted immediate sequence more efficient?
4687 We must allow for an extra NOT instruction for XOR operations, although
4688 there is some chance that the final 'mvn' will get optimized later. */
4689 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4691 insns = inv_insns;
4692 immediates = &inv_immediates;
4694 else
4696 can_invert = 0;
4697 final_invert = 0;
4700 /* Now output the chosen sequence as instructions. */
4701 if (generate)
4703 for (i = 0; i < insns; i++)
4705 rtx new_src, temp1_rtx;
4707 temp1 = immediates->i[i];
4709 if (code == SET || code == MINUS)
4710 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4711 else if ((final_invert || i < (insns - 1)) && subtargets)
4712 new_src = gen_reg_rtx (mode);
4713 else
4714 new_src = target;
4716 if (can_invert)
4717 temp1 = ~temp1;
4718 else if (can_negate)
4719 temp1 = -temp1;
4721 temp1 = trunc_int_for_mode (temp1, mode);
4722 temp1_rtx = GEN_INT (temp1);
4724 if (code == SET)
4726 else if (code == MINUS)
4727 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4728 else
4729 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4731 emit_constant_insn (cond,
4732 gen_rtx_SET (VOIDmode, new_src,
4733 temp1_rtx));
4734 source = new_src;
4736 if (code == SET)
4738 can_negate = can_invert;
4739 can_invert = 0;
4740 code = PLUS;
4742 else if (code == MINUS)
4743 code = PLUS;
4747 if (final_invert)
4749 if (generate)
4750 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4751 gen_rtx_NOT (mode, source)));
4752 insns++;
4755 return insns;
4758 /* Canonicalize a comparison so that we are more likely to recognize it.
4759 This can be done for a few constant compares, where we can make the
4760 immediate value easier to load. */
4762 static void
4763 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4764 bool op0_preserve_value)
4766 machine_mode mode;
4767 unsigned HOST_WIDE_INT i, maxval;
4769 mode = GET_MODE (*op0);
4770 if (mode == VOIDmode)
4771 mode = GET_MODE (*op1);
4773 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4775 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4776 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4777 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4778 for GTU/LEU in Thumb mode. */
4779 if (mode == DImode)
4782 if (*code == GT || *code == LE
4783 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4785 /* Missing comparison. First try to use an available
4786 comparison. */
4787 if (CONST_INT_P (*op1))
4789 i = INTVAL (*op1);
4790 switch (*code)
4792 case GT:
4793 case LE:
4794 if (i != maxval
4795 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4797 *op1 = GEN_INT (i + 1);
4798 *code = *code == GT ? GE : LT;
4799 return;
4801 break;
4802 case GTU:
4803 case LEU:
4804 if (i != ~((unsigned HOST_WIDE_INT) 0)
4805 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4807 *op1 = GEN_INT (i + 1);
4808 *code = *code == GTU ? GEU : LTU;
4809 return;
4811 break;
4812 default:
4813 gcc_unreachable ();
4817 /* If that did not work, reverse the condition. */
4818 if (!op0_preserve_value)
4820 std::swap (*op0, *op1);
4821 *code = (int)swap_condition ((enum rtx_code)*code);
4824 return;
4827 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4828 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4829 to facilitate possible combining with a cmp into 'ands'. */
4830 if (mode == SImode
4831 && GET_CODE (*op0) == ZERO_EXTEND
4832 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4833 && GET_MODE (XEXP (*op0, 0)) == QImode
4834 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4835 && subreg_lowpart_p (XEXP (*op0, 0))
4836 && *op1 == const0_rtx)
4837 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4838 GEN_INT (255));
4840 /* Comparisons smaller than DImode. Only adjust comparisons against
4841 an out-of-range constant. */
4842 if (!CONST_INT_P (*op1)
4843 || const_ok_for_arm (INTVAL (*op1))
4844 || const_ok_for_arm (- INTVAL (*op1)))
4845 return;
4847 i = INTVAL (*op1);
4849 switch (*code)
4851 case EQ:
4852 case NE:
4853 return;
4855 case GT:
4856 case LE:
4857 if (i != maxval
4858 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4860 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
4861 *code = *code == GT ? GE : LT;
4862 return;
4864 break;
4866 case GE:
4867 case LT:
4868 if (i != ~maxval
4869 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4871 *op1 = GEN_INT (i - 1);
4872 *code = *code == GE ? GT : LE;
4873 return;
4875 break;
4877 case GTU:
4878 case LEU:
4879 if (i != ~((unsigned HOST_WIDE_INT) 0)
4880 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4882 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
4883 *code = *code == GTU ? GEU : LTU;
4884 return;
4886 break;
4888 case GEU:
4889 case LTU:
4890 if (i != 0
4891 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4893 *op1 = GEN_INT (i - 1);
4894 *code = *code == GEU ? GTU : LEU;
4895 return;
4897 break;
4899 default:
4900 gcc_unreachable ();
4905 /* Define how to find the value returned by a function. */
4907 static rtx
4908 arm_function_value(const_tree type, const_tree func,
4909 bool outgoing ATTRIBUTE_UNUSED)
4911 machine_mode mode;
4912 int unsignedp ATTRIBUTE_UNUSED;
4913 rtx r ATTRIBUTE_UNUSED;
4915 mode = TYPE_MODE (type);
4917 if (TARGET_AAPCS_BASED)
4918 return aapcs_allocate_return_reg (mode, type, func);
4920 /* Promote integer types. */
4921 if (INTEGRAL_TYPE_P (type))
4922 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4924 /* Promotes small structs returned in a register to full-word size
4925 for big-endian AAPCS. */
4926 if (arm_return_in_msb (type))
4928 HOST_WIDE_INT size = int_size_in_bytes (type);
4929 if (size % UNITS_PER_WORD != 0)
4931 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4932 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4936 return arm_libcall_value_1 (mode);
4939 /* libcall hashtable helpers. */
4941 struct libcall_hasher : typed_noop_remove <rtx_def>
4943 typedef const rtx_def *value_type;
4944 typedef const rtx_def *compare_type;
4945 static inline hashval_t hash (const rtx_def *);
4946 static inline bool equal (const rtx_def *, const rtx_def *);
4947 static inline void remove (rtx_def *);
4950 inline bool
4951 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
4953 return rtx_equal_p (p1, p2);
4956 inline hashval_t
4957 libcall_hasher::hash (const rtx_def *p1)
4959 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4962 typedef hash_table<libcall_hasher> libcall_table_type;
4964 static void
4965 add_libcall (libcall_table_type *htab, rtx libcall)
4967 *htab->find_slot (libcall, INSERT) = libcall;
4970 static bool
4971 arm_libcall_uses_aapcs_base (const_rtx libcall)
4973 static bool init_done = false;
4974 static libcall_table_type *libcall_htab = NULL;
4976 if (!init_done)
4978 init_done = true;
4980 libcall_htab = new libcall_table_type (31);
4981 add_libcall (libcall_htab,
4982 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4983 add_libcall (libcall_htab,
4984 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4985 add_libcall (libcall_htab,
4986 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4987 add_libcall (libcall_htab,
4988 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4990 add_libcall (libcall_htab,
4991 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4992 add_libcall (libcall_htab,
4993 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4994 add_libcall (libcall_htab,
4995 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4996 add_libcall (libcall_htab,
4997 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4999 add_libcall (libcall_htab,
5000 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5001 add_libcall (libcall_htab,
5002 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5003 add_libcall (libcall_htab,
5004 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5005 add_libcall (libcall_htab,
5006 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5007 add_libcall (libcall_htab,
5008 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5009 add_libcall (libcall_htab,
5010 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5011 add_libcall (libcall_htab,
5012 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5013 add_libcall (libcall_htab,
5014 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5016 /* Values from double-precision helper functions are returned in core
5017 registers if the selected core only supports single-precision
5018 arithmetic, even if we are using the hard-float ABI. The same is
5019 true for single-precision helpers, but we will never be using the
5020 hard-float ABI on a CPU which doesn't support single-precision
5021 operations in hardware. */
5022 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5023 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5024 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5025 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5026 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5027 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5028 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5029 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5030 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5031 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5032 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5033 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5034 SFmode));
5035 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5036 DFmode));
5039 return libcall && libcall_htab->find (libcall) != NULL;
5042 static rtx
5043 arm_libcall_value_1 (machine_mode mode)
5045 if (TARGET_AAPCS_BASED)
5046 return aapcs_libcall_value (mode);
5047 else if (TARGET_IWMMXT_ABI
5048 && arm_vector_mode_supported_p (mode))
5049 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5050 else
5051 return gen_rtx_REG (mode, ARG_REGISTER (1));
5054 /* Define how to find the value returned by a library function
5055 assuming the value has mode MODE. */
5057 static rtx
5058 arm_libcall_value (machine_mode mode, const_rtx libcall)
5060 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5061 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5063 /* The following libcalls return their result in integer registers,
5064 even though they return a floating point value. */
5065 if (arm_libcall_uses_aapcs_base (libcall))
5066 return gen_rtx_REG (mode, ARG_REGISTER(1));
5070 return arm_libcall_value_1 (mode);
5073 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5075 static bool
5076 arm_function_value_regno_p (const unsigned int regno)
5078 if (regno == ARG_REGISTER (1)
5079 || (TARGET_32BIT
5080 && TARGET_AAPCS_BASED
5081 && TARGET_VFP
5082 && TARGET_HARD_FLOAT
5083 && regno == FIRST_VFP_REGNUM)
5084 || (TARGET_IWMMXT_ABI
5085 && regno == FIRST_IWMMXT_REGNUM))
5086 return true;
5088 return false;
5091 /* Determine the amount of memory needed to store the possible return
5092 registers of an untyped call. */
5094 arm_apply_result_size (void)
5096 int size = 16;
5098 if (TARGET_32BIT)
5100 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
5101 size += 32;
5102 if (TARGET_IWMMXT_ABI)
5103 size += 8;
5106 return size;
5109 /* Decide whether TYPE should be returned in memory (true)
5110 or in a register (false). FNTYPE is the type of the function making
5111 the call. */
5112 static bool
5113 arm_return_in_memory (const_tree type, const_tree fntype)
5115 HOST_WIDE_INT size;
5117 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5119 if (TARGET_AAPCS_BASED)
5121 /* Simple, non-aggregate types (ie not including vectors and
5122 complex) are always returned in a register (or registers).
5123 We don't care about which register here, so we can short-cut
5124 some of the detail. */
5125 if (!AGGREGATE_TYPE_P (type)
5126 && TREE_CODE (type) != VECTOR_TYPE
5127 && TREE_CODE (type) != COMPLEX_TYPE)
5128 return false;
5130 /* Any return value that is no larger than one word can be
5131 returned in r0. */
5132 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5133 return false;
5135 /* Check any available co-processors to see if they accept the
5136 type as a register candidate (VFP, for example, can return
5137 some aggregates in consecutive registers). These aren't
5138 available if the call is variadic. */
5139 if (aapcs_select_return_coproc (type, fntype) >= 0)
5140 return false;
5142 /* Vector values should be returned using ARM registers, not
5143 memory (unless they're over 16 bytes, which will break since
5144 we only have four call-clobbered registers to play with). */
5145 if (TREE_CODE (type) == VECTOR_TYPE)
5146 return (size < 0 || size > (4 * UNITS_PER_WORD));
5148 /* The rest go in memory. */
5149 return true;
5152 if (TREE_CODE (type) == VECTOR_TYPE)
5153 return (size < 0 || size > (4 * UNITS_PER_WORD));
5155 if (!AGGREGATE_TYPE_P (type) &&
5156 (TREE_CODE (type) != VECTOR_TYPE))
5157 /* All simple types are returned in registers. */
5158 return false;
5160 if (arm_abi != ARM_ABI_APCS)
5162 /* ATPCS and later return aggregate types in memory only if they are
5163 larger than a word (or are variable size). */
5164 return (size < 0 || size > UNITS_PER_WORD);
5167 /* For the arm-wince targets we choose to be compatible with Microsoft's
5168 ARM and Thumb compilers, which always return aggregates in memory. */
5169 #ifndef ARM_WINCE
5170 /* All structures/unions bigger than one word are returned in memory.
5171 Also catch the case where int_size_in_bytes returns -1. In this case
5172 the aggregate is either huge or of variable size, and in either case
5173 we will want to return it via memory and not in a register. */
5174 if (size < 0 || size > UNITS_PER_WORD)
5175 return true;
5177 if (TREE_CODE (type) == RECORD_TYPE)
5179 tree field;
5181 /* For a struct the APCS says that we only return in a register
5182 if the type is 'integer like' and every addressable element
5183 has an offset of zero. For practical purposes this means
5184 that the structure can have at most one non bit-field element
5185 and that this element must be the first one in the structure. */
5187 /* Find the first field, ignoring non FIELD_DECL things which will
5188 have been created by C++. */
5189 for (field = TYPE_FIELDS (type);
5190 field && TREE_CODE (field) != FIELD_DECL;
5191 field = DECL_CHAIN (field))
5192 continue;
5194 if (field == NULL)
5195 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5197 /* Check that the first field is valid for returning in a register. */
5199 /* ... Floats are not allowed */
5200 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5201 return true;
5203 /* ... Aggregates that are not themselves valid for returning in
5204 a register are not allowed. */
5205 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5206 return true;
5208 /* Now check the remaining fields, if any. Only bitfields are allowed,
5209 since they are not addressable. */
5210 for (field = DECL_CHAIN (field);
5211 field;
5212 field = DECL_CHAIN (field))
5214 if (TREE_CODE (field) != FIELD_DECL)
5215 continue;
5217 if (!DECL_BIT_FIELD_TYPE (field))
5218 return true;
5221 return false;
5224 if (TREE_CODE (type) == UNION_TYPE)
5226 tree field;
5228 /* Unions can be returned in registers if every element is
5229 integral, or can be returned in an integer register. */
5230 for (field = TYPE_FIELDS (type);
5231 field;
5232 field = DECL_CHAIN (field))
5234 if (TREE_CODE (field) != FIELD_DECL)
5235 continue;
5237 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5238 return true;
5240 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5241 return true;
5244 return false;
5246 #endif /* not ARM_WINCE */
5248 /* Return all other types in memory. */
5249 return true;
5252 const struct pcs_attribute_arg
5254 const char *arg;
5255 enum arm_pcs value;
5256 } pcs_attribute_args[] =
5258 {"aapcs", ARM_PCS_AAPCS},
5259 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5260 #if 0
5261 /* We could recognize these, but changes would be needed elsewhere
5262 * to implement them. */
5263 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5264 {"atpcs", ARM_PCS_ATPCS},
5265 {"apcs", ARM_PCS_APCS},
5266 #endif
5267 {NULL, ARM_PCS_UNKNOWN}
5270 static enum arm_pcs
5271 arm_pcs_from_attribute (tree attr)
5273 const struct pcs_attribute_arg *ptr;
5274 const char *arg;
5276 /* Get the value of the argument. */
5277 if (TREE_VALUE (attr) == NULL_TREE
5278 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5279 return ARM_PCS_UNKNOWN;
5281 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5283 /* Check it against the list of known arguments. */
5284 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5285 if (streq (arg, ptr->arg))
5286 return ptr->value;
5288 /* An unrecognized interrupt type. */
5289 return ARM_PCS_UNKNOWN;
5292 /* Get the PCS variant to use for this call. TYPE is the function's type
5293 specification, DECL is the specific declartion. DECL may be null if
5294 the call could be indirect or if this is a library call. */
5295 static enum arm_pcs
5296 arm_get_pcs_model (const_tree type, const_tree decl)
5298 bool user_convention = false;
5299 enum arm_pcs user_pcs = arm_pcs_default;
5300 tree attr;
5302 gcc_assert (type);
5304 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5305 if (attr)
5307 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5308 user_convention = true;
5311 if (TARGET_AAPCS_BASED)
5313 /* Detect varargs functions. These always use the base rules
5314 (no argument is ever a candidate for a co-processor
5315 register). */
5316 bool base_rules = stdarg_p (type);
5318 if (user_convention)
5320 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5321 sorry ("non-AAPCS derived PCS variant");
5322 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5323 error ("variadic functions must use the base AAPCS variant");
5326 if (base_rules)
5327 return ARM_PCS_AAPCS;
5328 else if (user_convention)
5329 return user_pcs;
5330 else if (decl && flag_unit_at_a_time)
5332 /* Local functions never leak outside this compilation unit,
5333 so we are free to use whatever conventions are
5334 appropriate. */
5335 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5336 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5337 if (i && i->local)
5338 return ARM_PCS_AAPCS_LOCAL;
5341 else if (user_convention && user_pcs != arm_pcs_default)
5342 sorry ("PCS variant");
5344 /* For everything else we use the target's default. */
5345 return arm_pcs_default;
5349 static void
5350 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5351 const_tree fntype ATTRIBUTE_UNUSED,
5352 rtx libcall ATTRIBUTE_UNUSED,
5353 const_tree fndecl ATTRIBUTE_UNUSED)
5355 /* Record the unallocated VFP registers. */
5356 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5357 pcum->aapcs_vfp_reg_alloc = 0;
5360 /* Walk down the type tree of TYPE counting consecutive base elements.
5361 If *MODEP is VOIDmode, then set it to the first valid floating point
5362 type. If a non-floating point type is found, or if a floating point
5363 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5364 otherwise return the count in the sub-tree. */
5365 static int
5366 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5368 machine_mode mode;
5369 HOST_WIDE_INT size;
5371 switch (TREE_CODE (type))
5373 case REAL_TYPE:
5374 mode = TYPE_MODE (type);
5375 if (mode != DFmode && mode != SFmode)
5376 return -1;
5378 if (*modep == VOIDmode)
5379 *modep = mode;
5381 if (*modep == mode)
5382 return 1;
5384 break;
5386 case COMPLEX_TYPE:
5387 mode = TYPE_MODE (TREE_TYPE (type));
5388 if (mode != DFmode && mode != SFmode)
5389 return -1;
5391 if (*modep == VOIDmode)
5392 *modep = mode;
5394 if (*modep == mode)
5395 return 2;
5397 break;
5399 case VECTOR_TYPE:
5400 /* Use V2SImode and V4SImode as representatives of all 64-bit
5401 and 128-bit vector types, whether or not those modes are
5402 supported with the present options. */
5403 size = int_size_in_bytes (type);
5404 switch (size)
5406 case 8:
5407 mode = V2SImode;
5408 break;
5409 case 16:
5410 mode = V4SImode;
5411 break;
5412 default:
5413 return -1;
5416 if (*modep == VOIDmode)
5417 *modep = mode;
5419 /* Vector modes are considered to be opaque: two vectors are
5420 equivalent for the purposes of being homogeneous aggregates
5421 if they are the same size. */
5422 if (*modep == mode)
5423 return 1;
5425 break;
5427 case ARRAY_TYPE:
5429 int count;
5430 tree index = TYPE_DOMAIN (type);
5432 /* Can't handle incomplete types nor sizes that are not
5433 fixed. */
5434 if (!COMPLETE_TYPE_P (type)
5435 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5436 return -1;
5438 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5439 if (count == -1
5440 || !index
5441 || !TYPE_MAX_VALUE (index)
5442 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5443 || !TYPE_MIN_VALUE (index)
5444 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5445 || count < 0)
5446 return -1;
5448 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5449 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5451 /* There must be no padding. */
5452 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5453 return -1;
5455 return count;
5458 case RECORD_TYPE:
5460 int count = 0;
5461 int sub_count;
5462 tree field;
5464 /* Can't handle incomplete types nor sizes that are not
5465 fixed. */
5466 if (!COMPLETE_TYPE_P (type)
5467 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5468 return -1;
5470 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5472 if (TREE_CODE (field) != FIELD_DECL)
5473 continue;
5475 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5476 if (sub_count < 0)
5477 return -1;
5478 count += sub_count;
5481 /* There must be no padding. */
5482 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5483 return -1;
5485 return count;
5488 case UNION_TYPE:
5489 case QUAL_UNION_TYPE:
5491 /* These aren't very interesting except in a degenerate case. */
5492 int count = 0;
5493 int sub_count;
5494 tree field;
5496 /* Can't handle incomplete types nor sizes that are not
5497 fixed. */
5498 if (!COMPLETE_TYPE_P (type)
5499 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5500 return -1;
5502 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5504 if (TREE_CODE (field) != FIELD_DECL)
5505 continue;
5507 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5508 if (sub_count < 0)
5509 return -1;
5510 count = count > sub_count ? count : sub_count;
5513 /* There must be no padding. */
5514 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5515 return -1;
5517 return count;
5520 default:
5521 break;
5524 return -1;
5527 /* Return true if PCS_VARIANT should use VFP registers. */
5528 static bool
5529 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5531 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5533 static bool seen_thumb1_vfp = false;
5535 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5537 sorry ("Thumb-1 hard-float VFP ABI");
5538 /* sorry() is not immediately fatal, so only display this once. */
5539 seen_thumb1_vfp = true;
5542 return true;
5545 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5546 return false;
5548 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5549 (TARGET_VFP_DOUBLE || !is_double));
5552 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5553 suitable for passing or returning in VFP registers for the PCS
5554 variant selected. If it is, then *BASE_MODE is updated to contain
5555 a machine mode describing each element of the argument's type and
5556 *COUNT to hold the number of such elements. */
5557 static bool
5558 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5559 machine_mode mode, const_tree type,
5560 machine_mode *base_mode, int *count)
5562 machine_mode new_mode = VOIDmode;
5564 /* If we have the type information, prefer that to working things
5565 out from the mode. */
5566 if (type)
5568 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5570 if (ag_count > 0 && ag_count <= 4)
5571 *count = ag_count;
5572 else
5573 return false;
5575 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5576 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5577 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5579 *count = 1;
5580 new_mode = mode;
5582 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5584 *count = 2;
5585 new_mode = (mode == DCmode ? DFmode : SFmode);
5587 else
5588 return false;
5591 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5592 return false;
5594 *base_mode = new_mode;
5595 return true;
5598 static bool
5599 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5600 machine_mode mode, const_tree type)
5602 int count ATTRIBUTE_UNUSED;
5603 machine_mode ag_mode ATTRIBUTE_UNUSED;
5605 if (!use_vfp_abi (pcs_variant, false))
5606 return false;
5607 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5608 &ag_mode, &count);
5611 static bool
5612 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5613 const_tree type)
5615 if (!use_vfp_abi (pcum->pcs_variant, false))
5616 return false;
5618 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5619 &pcum->aapcs_vfp_rmode,
5620 &pcum->aapcs_vfp_rcount);
5623 static bool
5624 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5625 const_tree type ATTRIBUTE_UNUSED)
5627 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5628 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5629 int regno;
5631 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5632 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5634 pcum->aapcs_vfp_reg_alloc = mask << regno;
5635 if (mode == BLKmode
5636 || (mode == TImode && ! TARGET_NEON)
5637 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5639 int i;
5640 int rcount = pcum->aapcs_vfp_rcount;
5641 int rshift = shift;
5642 machine_mode rmode = pcum->aapcs_vfp_rmode;
5643 rtx par;
5644 if (!TARGET_NEON)
5646 /* Avoid using unsupported vector modes. */
5647 if (rmode == V2SImode)
5648 rmode = DImode;
5649 else if (rmode == V4SImode)
5651 rmode = DImode;
5652 rcount *= 2;
5653 rshift /= 2;
5656 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5657 for (i = 0; i < rcount; i++)
5659 rtx tmp = gen_rtx_REG (rmode,
5660 FIRST_VFP_REGNUM + regno + i * rshift);
5661 tmp = gen_rtx_EXPR_LIST
5662 (VOIDmode, tmp,
5663 GEN_INT (i * GET_MODE_SIZE (rmode)));
5664 XVECEXP (par, 0, i) = tmp;
5667 pcum->aapcs_reg = par;
5669 else
5670 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5671 return true;
5673 return false;
5676 static rtx
5677 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5678 machine_mode mode,
5679 const_tree type ATTRIBUTE_UNUSED)
5681 if (!use_vfp_abi (pcs_variant, false))
5682 return NULL;
5684 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5686 int count;
5687 machine_mode ag_mode;
5688 int i;
5689 rtx par;
5690 int shift;
5692 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5693 &ag_mode, &count);
5695 if (!TARGET_NEON)
5697 if (ag_mode == V2SImode)
5698 ag_mode = DImode;
5699 else if (ag_mode == V4SImode)
5701 ag_mode = DImode;
5702 count *= 2;
5705 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5706 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5707 for (i = 0; i < count; i++)
5709 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5710 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5711 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5712 XVECEXP (par, 0, i) = tmp;
5715 return par;
5718 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5721 static void
5722 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5723 machine_mode mode ATTRIBUTE_UNUSED,
5724 const_tree type ATTRIBUTE_UNUSED)
5726 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5727 pcum->aapcs_vfp_reg_alloc = 0;
5728 return;
5731 #define AAPCS_CP(X) \
5733 aapcs_ ## X ## _cum_init, \
5734 aapcs_ ## X ## _is_call_candidate, \
5735 aapcs_ ## X ## _allocate, \
5736 aapcs_ ## X ## _is_return_candidate, \
5737 aapcs_ ## X ## _allocate_return_reg, \
5738 aapcs_ ## X ## _advance \
5741 /* Table of co-processors that can be used to pass arguments in
5742 registers. Idealy no arugment should be a candidate for more than
5743 one co-processor table entry, but the table is processed in order
5744 and stops after the first match. If that entry then fails to put
5745 the argument into a co-processor register, the argument will go on
5746 the stack. */
5747 static struct
5749 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5750 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5752 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5753 BLKmode) is a candidate for this co-processor's registers; this
5754 function should ignore any position-dependent state in
5755 CUMULATIVE_ARGS and only use call-type dependent information. */
5756 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5758 /* Return true if the argument does get a co-processor register; it
5759 should set aapcs_reg to an RTX of the register allocated as is
5760 required for a return from FUNCTION_ARG. */
5761 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5763 /* Return true if a result of mode MODE (or type TYPE if MODE is
5764 BLKmode) is can be returned in this co-processor's registers. */
5765 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
5767 /* Allocate and return an RTX element to hold the return type of a
5768 call, this routine must not fail and will only be called if
5769 is_return_candidate returned true with the same parameters. */
5770 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
5772 /* Finish processing this argument and prepare to start processing
5773 the next one. */
5774 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5775 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5777 AAPCS_CP(vfp)
5780 #undef AAPCS_CP
5782 static int
5783 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
5784 const_tree type)
5786 int i;
5788 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5789 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5790 return i;
5792 return -1;
5795 static int
5796 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5798 /* We aren't passed a decl, so we can't check that a call is local.
5799 However, it isn't clear that that would be a win anyway, since it
5800 might limit some tail-calling opportunities. */
5801 enum arm_pcs pcs_variant;
5803 if (fntype)
5805 const_tree fndecl = NULL_TREE;
5807 if (TREE_CODE (fntype) == FUNCTION_DECL)
5809 fndecl = fntype;
5810 fntype = TREE_TYPE (fntype);
5813 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5815 else
5816 pcs_variant = arm_pcs_default;
5818 if (pcs_variant != ARM_PCS_AAPCS)
5820 int i;
5822 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5823 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5824 TYPE_MODE (type),
5825 type))
5826 return i;
5828 return -1;
5831 static rtx
5832 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
5833 const_tree fntype)
5835 /* We aren't passed a decl, so we can't check that a call is local.
5836 However, it isn't clear that that would be a win anyway, since it
5837 might limit some tail-calling opportunities. */
5838 enum arm_pcs pcs_variant;
5839 int unsignedp ATTRIBUTE_UNUSED;
5841 if (fntype)
5843 const_tree fndecl = NULL_TREE;
5845 if (TREE_CODE (fntype) == FUNCTION_DECL)
5847 fndecl = fntype;
5848 fntype = TREE_TYPE (fntype);
5851 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5853 else
5854 pcs_variant = arm_pcs_default;
5856 /* Promote integer types. */
5857 if (type && INTEGRAL_TYPE_P (type))
5858 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5860 if (pcs_variant != ARM_PCS_AAPCS)
5862 int i;
5864 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5865 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5866 type))
5867 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5868 mode, type);
5871 /* Promotes small structs returned in a register to full-word size
5872 for big-endian AAPCS. */
5873 if (type && arm_return_in_msb (type))
5875 HOST_WIDE_INT size = int_size_in_bytes (type);
5876 if (size % UNITS_PER_WORD != 0)
5878 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5879 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5883 return gen_rtx_REG (mode, R0_REGNUM);
5886 static rtx
5887 aapcs_libcall_value (machine_mode mode)
5889 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5890 && GET_MODE_SIZE (mode) <= 4)
5891 mode = SImode;
5893 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5896 /* Lay out a function argument using the AAPCS rules. The rule
5897 numbers referred to here are those in the AAPCS. */
5898 static void
5899 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
5900 const_tree type, bool named)
5902 int nregs, nregs2;
5903 int ncrn;
5905 /* We only need to do this once per argument. */
5906 if (pcum->aapcs_arg_processed)
5907 return;
5909 pcum->aapcs_arg_processed = true;
5911 /* Special case: if named is false then we are handling an incoming
5912 anonymous argument which is on the stack. */
5913 if (!named)
5914 return;
5916 /* Is this a potential co-processor register candidate? */
5917 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5919 int slot = aapcs_select_call_coproc (pcum, mode, type);
5920 pcum->aapcs_cprc_slot = slot;
5922 /* We don't have to apply any of the rules from part B of the
5923 preparation phase, these are handled elsewhere in the
5924 compiler. */
5926 if (slot >= 0)
5928 /* A Co-processor register candidate goes either in its own
5929 class of registers or on the stack. */
5930 if (!pcum->aapcs_cprc_failed[slot])
5932 /* C1.cp - Try to allocate the argument to co-processor
5933 registers. */
5934 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5935 return;
5937 /* C2.cp - Put the argument on the stack and note that we
5938 can't assign any more candidates in this slot. We also
5939 need to note that we have allocated stack space, so that
5940 we won't later try to split a non-cprc candidate between
5941 core registers and the stack. */
5942 pcum->aapcs_cprc_failed[slot] = true;
5943 pcum->can_split = false;
5946 /* We didn't get a register, so this argument goes on the
5947 stack. */
5948 gcc_assert (pcum->can_split == false);
5949 return;
5953 /* C3 - For double-word aligned arguments, round the NCRN up to the
5954 next even number. */
5955 ncrn = pcum->aapcs_ncrn;
5956 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5957 ncrn++;
5959 nregs = ARM_NUM_REGS2(mode, type);
5961 /* Sigh, this test should really assert that nregs > 0, but a GCC
5962 extension allows empty structs and then gives them empty size; it
5963 then allows such a structure to be passed by value. For some of
5964 the code below we have to pretend that such an argument has
5965 non-zero size so that we 'locate' it correctly either in
5966 registers or on the stack. */
5967 gcc_assert (nregs >= 0);
5969 nregs2 = nregs ? nregs : 1;
5971 /* C4 - Argument fits entirely in core registers. */
5972 if (ncrn + nregs2 <= NUM_ARG_REGS)
5974 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5975 pcum->aapcs_next_ncrn = ncrn + nregs;
5976 return;
5979 /* C5 - Some core registers left and there are no arguments already
5980 on the stack: split this argument between the remaining core
5981 registers and the stack. */
5982 if (ncrn < NUM_ARG_REGS && pcum->can_split)
5984 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5985 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5986 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5987 return;
5990 /* C6 - NCRN is set to 4. */
5991 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5993 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5994 return;
5997 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5998 for a call to a function whose data type is FNTYPE.
5999 For a library call, FNTYPE is NULL. */
6000 void
6001 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6002 rtx libname,
6003 tree fndecl ATTRIBUTE_UNUSED)
6005 /* Long call handling. */
6006 if (fntype)
6007 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6008 else
6009 pcum->pcs_variant = arm_pcs_default;
6011 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6013 if (arm_libcall_uses_aapcs_base (libname))
6014 pcum->pcs_variant = ARM_PCS_AAPCS;
6016 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6017 pcum->aapcs_reg = NULL_RTX;
6018 pcum->aapcs_partial = 0;
6019 pcum->aapcs_arg_processed = false;
6020 pcum->aapcs_cprc_slot = -1;
6021 pcum->can_split = true;
6023 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6025 int i;
6027 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6029 pcum->aapcs_cprc_failed[i] = false;
6030 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6033 return;
6036 /* Legacy ABIs */
6038 /* On the ARM, the offset starts at 0. */
6039 pcum->nregs = 0;
6040 pcum->iwmmxt_nregs = 0;
6041 pcum->can_split = true;
6043 /* Varargs vectors are treated the same as long long.
6044 named_count avoids having to change the way arm handles 'named' */
6045 pcum->named_count = 0;
6046 pcum->nargs = 0;
6048 if (TARGET_REALLY_IWMMXT && fntype)
6050 tree fn_arg;
6052 for (fn_arg = TYPE_ARG_TYPES (fntype);
6053 fn_arg;
6054 fn_arg = TREE_CHAIN (fn_arg))
6055 pcum->named_count += 1;
6057 if (! pcum->named_count)
6058 pcum->named_count = INT_MAX;
6062 /* Return true if mode/type need doubleword alignment. */
6063 static bool
6064 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6066 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
6067 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
6071 /* Determine where to put an argument to a function.
6072 Value is zero to push the argument on the stack,
6073 or a hard register in which to store the argument.
6075 MODE is the argument's machine mode.
6076 TYPE is the data type of the argument (as a tree).
6077 This is null for libcalls where that information may
6078 not be available.
6079 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6080 the preceding args and about the function being called.
6081 NAMED is nonzero if this argument is a named parameter
6082 (otherwise it is an extra parameter matching an ellipsis).
6084 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6085 other arguments are passed on the stack. If (NAMED == 0) (which happens
6086 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6087 defined), say it is passed in the stack (function_prologue will
6088 indeed make it pass in the stack if necessary). */
6090 static rtx
6091 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6092 const_tree type, bool named)
6094 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6095 int nregs;
6097 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6098 a call insn (op3 of a call_value insn). */
6099 if (mode == VOIDmode)
6100 return const0_rtx;
6102 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6104 aapcs_layout_arg (pcum, mode, type, named);
6105 return pcum->aapcs_reg;
6108 /* Varargs vectors are treated the same as long long.
6109 named_count avoids having to change the way arm handles 'named' */
6110 if (TARGET_IWMMXT_ABI
6111 && arm_vector_mode_supported_p (mode)
6112 && pcum->named_count > pcum->nargs + 1)
6114 if (pcum->iwmmxt_nregs <= 9)
6115 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6116 else
6118 pcum->can_split = false;
6119 return NULL_RTX;
6123 /* Put doubleword aligned quantities in even register pairs. */
6124 if (pcum->nregs & 1
6125 && ARM_DOUBLEWORD_ALIGN
6126 && arm_needs_doubleword_align (mode, type))
6127 pcum->nregs++;
6129 /* Only allow splitting an arg between regs and memory if all preceding
6130 args were allocated to regs. For args passed by reference we only count
6131 the reference pointer. */
6132 if (pcum->can_split)
6133 nregs = 1;
6134 else
6135 nregs = ARM_NUM_REGS2 (mode, type);
6137 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6138 return NULL_RTX;
6140 return gen_rtx_REG (mode, pcum->nregs);
6143 static unsigned int
6144 arm_function_arg_boundary (machine_mode mode, const_tree type)
6146 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
6147 ? DOUBLEWORD_ALIGNMENT
6148 : PARM_BOUNDARY);
6151 static int
6152 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6153 tree type, bool named)
6155 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6156 int nregs = pcum->nregs;
6158 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6160 aapcs_layout_arg (pcum, mode, type, named);
6161 return pcum->aapcs_partial;
6164 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6165 return 0;
6167 if (NUM_ARG_REGS > nregs
6168 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6169 && pcum->can_split)
6170 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6172 return 0;
6175 /* Update the data in PCUM to advance over an argument
6176 of mode MODE and data type TYPE.
6177 (TYPE is null for libcalls where that information may not be available.) */
6179 static void
6180 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6181 const_tree type, bool named)
6183 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6185 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6187 aapcs_layout_arg (pcum, mode, type, named);
6189 if (pcum->aapcs_cprc_slot >= 0)
6191 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6192 type);
6193 pcum->aapcs_cprc_slot = -1;
6196 /* Generic stuff. */
6197 pcum->aapcs_arg_processed = false;
6198 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6199 pcum->aapcs_reg = NULL_RTX;
6200 pcum->aapcs_partial = 0;
6202 else
6204 pcum->nargs += 1;
6205 if (arm_vector_mode_supported_p (mode)
6206 && pcum->named_count > pcum->nargs
6207 && TARGET_IWMMXT_ABI)
6208 pcum->iwmmxt_nregs += 1;
6209 else
6210 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6214 /* Variable sized types are passed by reference. This is a GCC
6215 extension to the ARM ABI. */
6217 static bool
6218 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6219 machine_mode mode ATTRIBUTE_UNUSED,
6220 const_tree type, bool named ATTRIBUTE_UNUSED)
6222 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6225 /* Encode the current state of the #pragma [no_]long_calls. */
6226 typedef enum
6228 OFF, /* No #pragma [no_]long_calls is in effect. */
6229 LONG, /* #pragma long_calls is in effect. */
6230 SHORT /* #pragma no_long_calls is in effect. */
6231 } arm_pragma_enum;
6233 static arm_pragma_enum arm_pragma_long_calls = OFF;
6235 void
6236 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6238 arm_pragma_long_calls = LONG;
6241 void
6242 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6244 arm_pragma_long_calls = SHORT;
6247 void
6248 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6250 arm_pragma_long_calls = OFF;
6253 /* Handle an attribute requiring a FUNCTION_DECL;
6254 arguments as in struct attribute_spec.handler. */
6255 static tree
6256 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6257 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6259 if (TREE_CODE (*node) != FUNCTION_DECL)
6261 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6262 name);
6263 *no_add_attrs = true;
6266 return NULL_TREE;
6269 /* Handle an "interrupt" or "isr" attribute;
6270 arguments as in struct attribute_spec.handler. */
6271 static tree
6272 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6273 bool *no_add_attrs)
6275 if (DECL_P (*node))
6277 if (TREE_CODE (*node) != FUNCTION_DECL)
6279 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6280 name);
6281 *no_add_attrs = true;
6283 /* FIXME: the argument if any is checked for type attributes;
6284 should it be checked for decl ones? */
6286 else
6288 if (TREE_CODE (*node) == FUNCTION_TYPE
6289 || TREE_CODE (*node) == METHOD_TYPE)
6291 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6293 warning (OPT_Wattributes, "%qE attribute ignored",
6294 name);
6295 *no_add_attrs = true;
6298 else if (TREE_CODE (*node) == POINTER_TYPE
6299 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6300 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6301 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6303 *node = build_variant_type_copy (*node);
6304 TREE_TYPE (*node) = build_type_attribute_variant
6305 (TREE_TYPE (*node),
6306 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6307 *no_add_attrs = true;
6309 else
6311 /* Possibly pass this attribute on from the type to a decl. */
6312 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6313 | (int) ATTR_FLAG_FUNCTION_NEXT
6314 | (int) ATTR_FLAG_ARRAY_NEXT))
6316 *no_add_attrs = true;
6317 return tree_cons (name, args, NULL_TREE);
6319 else
6321 warning (OPT_Wattributes, "%qE attribute ignored",
6322 name);
6327 return NULL_TREE;
6330 /* Handle a "pcs" attribute; arguments as in struct
6331 attribute_spec.handler. */
6332 static tree
6333 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6334 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6336 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6338 warning (OPT_Wattributes, "%qE attribute ignored", name);
6339 *no_add_attrs = true;
6341 return NULL_TREE;
6344 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6345 /* Handle the "notshared" attribute. This attribute is another way of
6346 requesting hidden visibility. ARM's compiler supports
6347 "__declspec(notshared)"; we support the same thing via an
6348 attribute. */
6350 static tree
6351 arm_handle_notshared_attribute (tree *node,
6352 tree name ATTRIBUTE_UNUSED,
6353 tree args ATTRIBUTE_UNUSED,
6354 int flags ATTRIBUTE_UNUSED,
6355 bool *no_add_attrs)
6357 tree decl = TYPE_NAME (*node);
6359 if (decl)
6361 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6362 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6363 *no_add_attrs = false;
6365 return NULL_TREE;
6367 #endif
6369 /* Return 0 if the attributes for two types are incompatible, 1 if they
6370 are compatible, and 2 if they are nearly compatible (which causes a
6371 warning to be generated). */
6372 static int
6373 arm_comp_type_attributes (const_tree type1, const_tree type2)
6375 int l1, l2, s1, s2;
6377 /* Check for mismatch of non-default calling convention. */
6378 if (TREE_CODE (type1) != FUNCTION_TYPE)
6379 return 1;
6381 /* Check for mismatched call attributes. */
6382 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6383 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6384 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6385 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6387 /* Only bother to check if an attribute is defined. */
6388 if (l1 | l2 | s1 | s2)
6390 /* If one type has an attribute, the other must have the same attribute. */
6391 if ((l1 != l2) || (s1 != s2))
6392 return 0;
6394 /* Disallow mixed attributes. */
6395 if ((l1 & s2) || (l2 & s1))
6396 return 0;
6399 /* Check for mismatched ISR attribute. */
6400 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6401 if (! l1)
6402 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6403 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6404 if (! l2)
6405 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6406 if (l1 != l2)
6407 return 0;
6409 return 1;
6412 /* Assigns default attributes to newly defined type. This is used to
6413 set short_call/long_call attributes for function types of
6414 functions defined inside corresponding #pragma scopes. */
6415 static void
6416 arm_set_default_type_attributes (tree type)
6418 /* Add __attribute__ ((long_call)) to all functions, when
6419 inside #pragma long_calls or __attribute__ ((short_call)),
6420 when inside #pragma no_long_calls. */
6421 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6423 tree type_attr_list, attr_name;
6424 type_attr_list = TYPE_ATTRIBUTES (type);
6426 if (arm_pragma_long_calls == LONG)
6427 attr_name = get_identifier ("long_call");
6428 else if (arm_pragma_long_calls == SHORT)
6429 attr_name = get_identifier ("short_call");
6430 else
6431 return;
6433 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6434 TYPE_ATTRIBUTES (type) = type_attr_list;
6438 /* Return true if DECL is known to be linked into section SECTION. */
6440 static bool
6441 arm_function_in_section_p (tree decl, section *section)
6443 /* We can only be certain about the prevailing symbol definition. */
6444 if (!decl_binds_to_current_def_p (decl))
6445 return false;
6447 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6448 if (!DECL_SECTION_NAME (decl))
6450 /* Make sure that we will not create a unique section for DECL. */
6451 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6452 return false;
6455 return function_section (decl) == section;
6458 /* Return nonzero if a 32-bit "long_call" should be generated for
6459 a call from the current function to DECL. We generate a long_call
6460 if the function:
6462 a. has an __attribute__((long call))
6463 or b. is within the scope of a #pragma long_calls
6464 or c. the -mlong-calls command line switch has been specified
6466 However we do not generate a long call if the function:
6468 d. has an __attribute__ ((short_call))
6469 or e. is inside the scope of a #pragma no_long_calls
6470 or f. is defined in the same section as the current function. */
6472 bool
6473 arm_is_long_call_p (tree decl)
6475 tree attrs;
6477 if (!decl)
6478 return TARGET_LONG_CALLS;
6480 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6481 if (lookup_attribute ("short_call", attrs))
6482 return false;
6484 /* For "f", be conservative, and only cater for cases in which the
6485 whole of the current function is placed in the same section. */
6486 if (!flag_reorder_blocks_and_partition
6487 && TREE_CODE (decl) == FUNCTION_DECL
6488 && arm_function_in_section_p (decl, current_function_section ()))
6489 return false;
6491 if (lookup_attribute ("long_call", attrs))
6492 return true;
6494 return TARGET_LONG_CALLS;
6497 /* Return nonzero if it is ok to make a tail-call to DECL. */
6498 static bool
6499 arm_function_ok_for_sibcall (tree decl, tree exp)
6501 unsigned long func_type;
6503 if (cfun->machine->sibcall_blocked)
6504 return false;
6506 /* Never tailcall something if we are generating code for Thumb-1. */
6507 if (TARGET_THUMB1)
6508 return false;
6510 /* The PIC register is live on entry to VxWorks PLT entries, so we
6511 must make the call before restoring the PIC register. */
6512 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6513 return false;
6515 /* If we are interworking and the function is not declared static
6516 then we can't tail-call it unless we know that it exists in this
6517 compilation unit (since it might be a Thumb routine). */
6518 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6519 && !TREE_ASM_WRITTEN (decl))
6520 return false;
6522 func_type = arm_current_func_type ();
6523 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6524 if (IS_INTERRUPT (func_type))
6525 return false;
6527 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6529 /* Check that the return value locations are the same. For
6530 example that we aren't returning a value from the sibling in
6531 a VFP register but then need to transfer it to a core
6532 register. */
6533 rtx a, b;
6535 a = arm_function_value (TREE_TYPE (exp), decl, false);
6536 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6537 cfun->decl, false);
6538 if (!rtx_equal_p (a, b))
6539 return false;
6542 /* Never tailcall if function may be called with a misaligned SP. */
6543 if (IS_STACKALIGN (func_type))
6544 return false;
6546 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6547 references should become a NOP. Don't convert such calls into
6548 sibling calls. */
6549 if (TARGET_AAPCS_BASED
6550 && arm_abi == ARM_ABI_AAPCS
6551 && decl
6552 && DECL_WEAK (decl))
6553 return false;
6555 /* Everything else is ok. */
6556 return true;
6560 /* Addressing mode support functions. */
6562 /* Return nonzero if X is a legitimate immediate operand when compiling
6563 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6565 legitimate_pic_operand_p (rtx x)
6567 if (GET_CODE (x) == SYMBOL_REF
6568 || (GET_CODE (x) == CONST
6569 && GET_CODE (XEXP (x, 0)) == PLUS
6570 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6571 return 0;
6573 return 1;
6576 /* Record that the current function needs a PIC register. Initialize
6577 cfun->machine->pic_reg if we have not already done so. */
6579 static void
6580 require_pic_register (void)
6582 /* A lot of the logic here is made obscure by the fact that this
6583 routine gets called as part of the rtx cost estimation process.
6584 We don't want those calls to affect any assumptions about the real
6585 function; and further, we can't call entry_of_function() until we
6586 start the real expansion process. */
6587 if (!crtl->uses_pic_offset_table)
6589 gcc_assert (can_create_pseudo_p ());
6590 if (arm_pic_register != INVALID_REGNUM
6591 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6593 if (!cfun->machine->pic_reg)
6594 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6596 /* Play games to avoid marking the function as needing pic
6597 if we are being called as part of the cost-estimation
6598 process. */
6599 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6600 crtl->uses_pic_offset_table = 1;
6602 else
6604 rtx_insn *seq, *insn;
6606 if (!cfun->machine->pic_reg)
6607 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6609 /* Play games to avoid marking the function as needing pic
6610 if we are being called as part of the cost-estimation
6611 process. */
6612 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6614 crtl->uses_pic_offset_table = 1;
6615 start_sequence ();
6617 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6618 && arm_pic_register > LAST_LO_REGNUM)
6619 emit_move_insn (cfun->machine->pic_reg,
6620 gen_rtx_REG (Pmode, arm_pic_register));
6621 else
6622 arm_load_pic_register (0UL);
6624 seq = get_insns ();
6625 end_sequence ();
6627 for (insn = seq; insn; insn = NEXT_INSN (insn))
6628 if (INSN_P (insn))
6629 INSN_LOCATION (insn) = prologue_location;
6631 /* We can be called during expansion of PHI nodes, where
6632 we can't yet emit instructions directly in the final
6633 insn stream. Queue the insns on the entry edge, they will
6634 be committed after everything else is expanded. */
6635 insert_insn_on_edge (seq,
6636 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6643 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6645 if (GET_CODE (orig) == SYMBOL_REF
6646 || GET_CODE (orig) == LABEL_REF)
6648 rtx insn;
6650 if (reg == 0)
6652 gcc_assert (can_create_pseudo_p ());
6653 reg = gen_reg_rtx (Pmode);
6656 /* VxWorks does not impose a fixed gap between segments; the run-time
6657 gap can be different from the object-file gap. We therefore can't
6658 use GOTOFF unless we are absolutely sure that the symbol is in the
6659 same segment as the GOT. Unfortunately, the flexibility of linker
6660 scripts means that we can't be sure of that in general, so assume
6661 that GOTOFF is never valid on VxWorks. */
6662 if ((GET_CODE (orig) == LABEL_REF
6663 || (GET_CODE (orig) == SYMBOL_REF &&
6664 SYMBOL_REF_LOCAL_P (orig)))
6665 && NEED_GOT_RELOC
6666 && arm_pic_data_is_text_relative)
6667 insn = arm_pic_static_addr (orig, reg);
6668 else
6670 rtx pat;
6671 rtx mem;
6673 /* If this function doesn't have a pic register, create one now. */
6674 require_pic_register ();
6676 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6678 /* Make the MEM as close to a constant as possible. */
6679 mem = SET_SRC (pat);
6680 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6681 MEM_READONLY_P (mem) = 1;
6682 MEM_NOTRAP_P (mem) = 1;
6684 insn = emit_insn (pat);
6687 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6688 by loop. */
6689 set_unique_reg_note (insn, REG_EQUAL, orig);
6691 return reg;
6693 else if (GET_CODE (orig) == CONST)
6695 rtx base, offset;
6697 if (GET_CODE (XEXP (orig, 0)) == PLUS
6698 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6699 return orig;
6701 /* Handle the case where we have: const (UNSPEC_TLS). */
6702 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6703 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6704 return orig;
6706 /* Handle the case where we have:
6707 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6708 CONST_INT. */
6709 if (GET_CODE (XEXP (orig, 0)) == PLUS
6710 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6711 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6713 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6714 return orig;
6717 if (reg == 0)
6719 gcc_assert (can_create_pseudo_p ());
6720 reg = gen_reg_rtx (Pmode);
6723 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6725 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6726 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6727 base == reg ? 0 : reg);
6729 if (CONST_INT_P (offset))
6731 /* The base register doesn't really matter, we only want to
6732 test the index for the appropriate mode. */
6733 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6735 gcc_assert (can_create_pseudo_p ());
6736 offset = force_reg (Pmode, offset);
6739 if (CONST_INT_P (offset))
6740 return plus_constant (Pmode, base, INTVAL (offset));
6743 if (GET_MODE_SIZE (mode) > 4
6744 && (GET_MODE_CLASS (mode) == MODE_INT
6745 || TARGET_SOFT_FLOAT))
6747 emit_insn (gen_addsi3 (reg, base, offset));
6748 return reg;
6751 return gen_rtx_PLUS (Pmode, base, offset);
6754 return orig;
6758 /* Find a spare register to use during the prolog of a function. */
6760 static int
6761 thumb_find_work_register (unsigned long pushed_regs_mask)
6763 int reg;
6765 /* Check the argument registers first as these are call-used. The
6766 register allocation order means that sometimes r3 might be used
6767 but earlier argument registers might not, so check them all. */
6768 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6769 if (!df_regs_ever_live_p (reg))
6770 return reg;
6772 /* Before going on to check the call-saved registers we can try a couple
6773 more ways of deducing that r3 is available. The first is when we are
6774 pushing anonymous arguments onto the stack and we have less than 4
6775 registers worth of fixed arguments(*). In this case r3 will be part of
6776 the variable argument list and so we can be sure that it will be
6777 pushed right at the start of the function. Hence it will be available
6778 for the rest of the prologue.
6779 (*): ie crtl->args.pretend_args_size is greater than 0. */
6780 if (cfun->machine->uses_anonymous_args
6781 && crtl->args.pretend_args_size > 0)
6782 return LAST_ARG_REGNUM;
6784 /* The other case is when we have fixed arguments but less than 4 registers
6785 worth. In this case r3 might be used in the body of the function, but
6786 it is not being used to convey an argument into the function. In theory
6787 we could just check crtl->args.size to see how many bytes are
6788 being passed in argument registers, but it seems that it is unreliable.
6789 Sometimes it will have the value 0 when in fact arguments are being
6790 passed. (See testcase execute/20021111-1.c for an example). So we also
6791 check the args_info.nregs field as well. The problem with this field is
6792 that it makes no allowances for arguments that are passed to the
6793 function but which are not used. Hence we could miss an opportunity
6794 when a function has an unused argument in r3. But it is better to be
6795 safe than to be sorry. */
6796 if (! cfun->machine->uses_anonymous_args
6797 && crtl->args.size >= 0
6798 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6799 && (TARGET_AAPCS_BASED
6800 ? crtl->args.info.aapcs_ncrn < 4
6801 : crtl->args.info.nregs < 4))
6802 return LAST_ARG_REGNUM;
6804 /* Otherwise look for a call-saved register that is going to be pushed. */
6805 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6806 if (pushed_regs_mask & (1 << reg))
6807 return reg;
6809 if (TARGET_THUMB2)
6811 /* Thumb-2 can use high regs. */
6812 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6813 if (pushed_regs_mask & (1 << reg))
6814 return reg;
6816 /* Something went wrong - thumb_compute_save_reg_mask()
6817 should have arranged for a suitable register to be pushed. */
6818 gcc_unreachable ();
6821 static GTY(()) int pic_labelno;
6823 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6824 low register. */
6826 void
6827 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6829 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6831 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6832 return;
6834 gcc_assert (flag_pic);
6836 pic_reg = cfun->machine->pic_reg;
6837 if (TARGET_VXWORKS_RTP)
6839 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6840 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6841 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6843 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6845 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6846 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6848 else
6850 /* We use an UNSPEC rather than a LABEL_REF because this label
6851 never appears in the code stream. */
6853 labelno = GEN_INT (pic_labelno++);
6854 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6855 l1 = gen_rtx_CONST (VOIDmode, l1);
6857 /* On the ARM the PC register contains 'dot + 8' at the time of the
6858 addition, on the Thumb it is 'dot + 4'. */
6859 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6860 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6861 UNSPEC_GOTSYM_OFF);
6862 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6864 if (TARGET_32BIT)
6866 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6868 else /* TARGET_THUMB1 */
6870 if (arm_pic_register != INVALID_REGNUM
6871 && REGNO (pic_reg) > LAST_LO_REGNUM)
6873 /* We will have pushed the pic register, so we should always be
6874 able to find a work register. */
6875 pic_tmp = gen_rtx_REG (SImode,
6876 thumb_find_work_register (saved_regs));
6877 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6878 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6879 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6881 else if (arm_pic_register != INVALID_REGNUM
6882 && arm_pic_register > LAST_LO_REGNUM
6883 && REGNO (pic_reg) <= LAST_LO_REGNUM)
6885 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6886 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6887 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6889 else
6890 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6894 /* Need to emit this whether or not we obey regdecls,
6895 since setjmp/longjmp can cause life info to screw up. */
6896 emit_use (pic_reg);
6899 /* Generate code to load the address of a static var when flag_pic is set. */
6900 static rtx
6901 arm_pic_static_addr (rtx orig, rtx reg)
6903 rtx l1, labelno, offset_rtx, insn;
6905 gcc_assert (flag_pic);
6907 /* We use an UNSPEC rather than a LABEL_REF because this label
6908 never appears in the code stream. */
6909 labelno = GEN_INT (pic_labelno++);
6910 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6911 l1 = gen_rtx_CONST (VOIDmode, l1);
6913 /* On the ARM the PC register contains 'dot + 8' at the time of the
6914 addition, on the Thumb it is 'dot + 4'. */
6915 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6916 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6917 UNSPEC_SYMBOL_OFFSET);
6918 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6920 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6921 return insn;
6924 /* Return nonzero if X is valid as an ARM state addressing register. */
6925 static int
6926 arm_address_register_rtx_p (rtx x, int strict_p)
6928 int regno;
6930 if (!REG_P (x))
6931 return 0;
6933 regno = REGNO (x);
6935 if (strict_p)
6936 return ARM_REGNO_OK_FOR_BASE_P (regno);
6938 return (regno <= LAST_ARM_REGNUM
6939 || regno >= FIRST_PSEUDO_REGISTER
6940 || regno == FRAME_POINTER_REGNUM
6941 || regno == ARG_POINTER_REGNUM);
6944 /* Return TRUE if this rtx is the difference of a symbol and a label,
6945 and will reduce to a PC-relative relocation in the object file.
6946 Expressions like this can be left alone when generating PIC, rather
6947 than forced through the GOT. */
6948 static int
6949 pcrel_constant_p (rtx x)
6951 if (GET_CODE (x) == MINUS)
6952 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6954 return FALSE;
6957 /* Return true if X will surely end up in an index register after next
6958 splitting pass. */
6959 static bool
6960 will_be_in_index_register (const_rtx x)
6962 /* arm.md: calculate_pic_address will split this into a register. */
6963 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6966 /* Return nonzero if X is a valid ARM state address operand. */
6968 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
6969 int strict_p)
6971 bool use_ldrd;
6972 enum rtx_code code = GET_CODE (x);
6974 if (arm_address_register_rtx_p (x, strict_p))
6975 return 1;
6977 use_ldrd = (TARGET_LDRD
6978 && (mode == DImode
6979 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6981 if (code == POST_INC || code == PRE_DEC
6982 || ((code == PRE_INC || code == POST_DEC)
6983 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6984 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6986 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6987 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6988 && GET_CODE (XEXP (x, 1)) == PLUS
6989 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6991 rtx addend = XEXP (XEXP (x, 1), 1);
6993 /* Don't allow ldrd post increment by register because it's hard
6994 to fixup invalid register choices. */
6995 if (use_ldrd
6996 && GET_CODE (x) == POST_MODIFY
6997 && REG_P (addend))
6998 return 0;
7000 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7001 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7004 /* After reload constants split into minipools will have addresses
7005 from a LABEL_REF. */
7006 else if (reload_completed
7007 && (code == LABEL_REF
7008 || (code == CONST
7009 && GET_CODE (XEXP (x, 0)) == PLUS
7010 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7011 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7012 return 1;
7014 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7015 return 0;
7017 else if (code == PLUS)
7019 rtx xop0 = XEXP (x, 0);
7020 rtx xop1 = XEXP (x, 1);
7022 return ((arm_address_register_rtx_p (xop0, strict_p)
7023 && ((CONST_INT_P (xop1)
7024 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7025 || (!strict_p && will_be_in_index_register (xop1))))
7026 || (arm_address_register_rtx_p (xop1, strict_p)
7027 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7030 #if 0
7031 /* Reload currently can't handle MINUS, so disable this for now */
7032 else if (GET_CODE (x) == MINUS)
7034 rtx xop0 = XEXP (x, 0);
7035 rtx xop1 = XEXP (x, 1);
7037 return (arm_address_register_rtx_p (xop0, strict_p)
7038 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7040 #endif
7042 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7043 && code == SYMBOL_REF
7044 && CONSTANT_POOL_ADDRESS_P (x)
7045 && ! (flag_pic
7046 && symbol_mentioned_p (get_pool_constant (x))
7047 && ! pcrel_constant_p (get_pool_constant (x))))
7048 return 1;
7050 return 0;
7053 /* Return nonzero if X is a valid Thumb-2 address operand. */
7054 static int
7055 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7057 bool use_ldrd;
7058 enum rtx_code code = GET_CODE (x);
7060 if (arm_address_register_rtx_p (x, strict_p))
7061 return 1;
7063 use_ldrd = (TARGET_LDRD
7064 && (mode == DImode
7065 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
7067 if (code == POST_INC || code == PRE_DEC
7068 || ((code == PRE_INC || code == POST_DEC)
7069 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7070 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7072 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7073 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7074 && GET_CODE (XEXP (x, 1)) == PLUS
7075 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7077 /* Thumb-2 only has autoincrement by constant. */
7078 rtx addend = XEXP (XEXP (x, 1), 1);
7079 HOST_WIDE_INT offset;
7081 if (!CONST_INT_P (addend))
7082 return 0;
7084 offset = INTVAL(addend);
7085 if (GET_MODE_SIZE (mode) <= 4)
7086 return (offset > -256 && offset < 256);
7088 return (use_ldrd && offset > -1024 && offset < 1024
7089 && (offset & 3) == 0);
7092 /* After reload constants split into minipools will have addresses
7093 from a LABEL_REF. */
7094 else if (reload_completed
7095 && (code == LABEL_REF
7096 || (code == CONST
7097 && GET_CODE (XEXP (x, 0)) == PLUS
7098 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7099 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7100 return 1;
7102 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7103 return 0;
7105 else if (code == PLUS)
7107 rtx xop0 = XEXP (x, 0);
7108 rtx xop1 = XEXP (x, 1);
7110 return ((arm_address_register_rtx_p (xop0, strict_p)
7111 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7112 || (!strict_p && will_be_in_index_register (xop1))))
7113 || (arm_address_register_rtx_p (xop1, strict_p)
7114 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7117 /* Normally we can assign constant values to target registers without
7118 the help of constant pool. But there are cases we have to use constant
7119 pool like:
7120 1) assign a label to register.
7121 2) sign-extend a 8bit value to 32bit and then assign to register.
7123 Constant pool access in format:
7124 (set (reg r0) (mem (symbol_ref (".LC0"))))
7125 will cause the use of literal pool (later in function arm_reorg).
7126 So here we mark such format as an invalid format, then the compiler
7127 will adjust it into:
7128 (set (reg r0) (symbol_ref (".LC0")))
7129 (set (reg r0) (mem (reg r0))).
7130 No extra register is required, and (mem (reg r0)) won't cause the use
7131 of literal pools. */
7132 else if (arm_disable_literal_pool && code == SYMBOL_REF
7133 && CONSTANT_POOL_ADDRESS_P (x))
7134 return 0;
7136 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7137 && code == SYMBOL_REF
7138 && CONSTANT_POOL_ADDRESS_P (x)
7139 && ! (flag_pic
7140 && symbol_mentioned_p (get_pool_constant (x))
7141 && ! pcrel_constant_p (get_pool_constant (x))))
7142 return 1;
7144 return 0;
7147 /* Return nonzero if INDEX is valid for an address index operand in
7148 ARM state. */
7149 static int
7150 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7151 int strict_p)
7153 HOST_WIDE_INT range;
7154 enum rtx_code code = GET_CODE (index);
7156 /* Standard coprocessor addressing modes. */
7157 if (TARGET_HARD_FLOAT
7158 && TARGET_VFP
7159 && (mode == SFmode || mode == DFmode))
7160 return (code == CONST_INT && INTVAL (index) < 1024
7161 && INTVAL (index) > -1024
7162 && (INTVAL (index) & 3) == 0);
7164 /* For quad modes, we restrict the constant offset to be slightly less
7165 than what the instruction format permits. We do this because for
7166 quad mode moves, we will actually decompose them into two separate
7167 double-mode reads or writes. INDEX must therefore be a valid
7168 (double-mode) offset and so should INDEX+8. */
7169 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7170 return (code == CONST_INT
7171 && INTVAL (index) < 1016
7172 && INTVAL (index) > -1024
7173 && (INTVAL (index) & 3) == 0);
7175 /* We have no such constraint on double mode offsets, so we permit the
7176 full range of the instruction format. */
7177 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7178 return (code == CONST_INT
7179 && INTVAL (index) < 1024
7180 && INTVAL (index) > -1024
7181 && (INTVAL (index) & 3) == 0);
7183 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7184 return (code == CONST_INT
7185 && INTVAL (index) < 1024
7186 && INTVAL (index) > -1024
7187 && (INTVAL (index) & 3) == 0);
7189 if (arm_address_register_rtx_p (index, strict_p)
7190 && (GET_MODE_SIZE (mode) <= 4))
7191 return 1;
7193 if (mode == DImode || mode == DFmode)
7195 if (code == CONST_INT)
7197 HOST_WIDE_INT val = INTVAL (index);
7199 if (TARGET_LDRD)
7200 return val > -256 && val < 256;
7201 else
7202 return val > -4096 && val < 4092;
7205 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7208 if (GET_MODE_SIZE (mode) <= 4
7209 && ! (arm_arch4
7210 && (mode == HImode
7211 || mode == HFmode
7212 || (mode == QImode && outer == SIGN_EXTEND))))
7214 if (code == MULT)
7216 rtx xiop0 = XEXP (index, 0);
7217 rtx xiop1 = XEXP (index, 1);
7219 return ((arm_address_register_rtx_p (xiop0, strict_p)
7220 && power_of_two_operand (xiop1, SImode))
7221 || (arm_address_register_rtx_p (xiop1, strict_p)
7222 && power_of_two_operand (xiop0, SImode)));
7224 else if (code == LSHIFTRT || code == ASHIFTRT
7225 || code == ASHIFT || code == ROTATERT)
7227 rtx op = XEXP (index, 1);
7229 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7230 && CONST_INT_P (op)
7231 && INTVAL (op) > 0
7232 && INTVAL (op) <= 31);
7236 /* For ARM v4 we may be doing a sign-extend operation during the
7237 load. */
7238 if (arm_arch4)
7240 if (mode == HImode
7241 || mode == HFmode
7242 || (outer == SIGN_EXTEND && mode == QImode))
7243 range = 256;
7244 else
7245 range = 4096;
7247 else
7248 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7250 return (code == CONST_INT
7251 && INTVAL (index) < range
7252 && INTVAL (index) > -range);
7255 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7256 index operand. i.e. 1, 2, 4 or 8. */
7257 static bool
7258 thumb2_index_mul_operand (rtx op)
7260 HOST_WIDE_INT val;
7262 if (!CONST_INT_P (op))
7263 return false;
7265 val = INTVAL(op);
7266 return (val == 1 || val == 2 || val == 4 || val == 8);
7269 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7270 static int
7271 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7273 enum rtx_code code = GET_CODE (index);
7275 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7276 /* Standard coprocessor addressing modes. */
7277 if (TARGET_HARD_FLOAT
7278 && TARGET_VFP
7279 && (mode == SFmode || mode == DFmode))
7280 return (code == CONST_INT && INTVAL (index) < 1024
7281 /* Thumb-2 allows only > -256 index range for it's core register
7282 load/stores. Since we allow SF/DF in core registers, we have
7283 to use the intersection between -256~4096 (core) and -1024~1024
7284 (coprocessor). */
7285 && INTVAL (index) > -256
7286 && (INTVAL (index) & 3) == 0);
7288 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7290 /* For DImode assume values will usually live in core regs
7291 and only allow LDRD addressing modes. */
7292 if (!TARGET_LDRD || mode != DImode)
7293 return (code == CONST_INT
7294 && INTVAL (index) < 1024
7295 && INTVAL (index) > -1024
7296 && (INTVAL (index) & 3) == 0);
7299 /* For quad modes, we restrict the constant offset to be slightly less
7300 than what the instruction format permits. We do this because for
7301 quad mode moves, we will actually decompose them into two separate
7302 double-mode reads or writes. INDEX must therefore be a valid
7303 (double-mode) offset and so should INDEX+8. */
7304 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7305 return (code == CONST_INT
7306 && INTVAL (index) < 1016
7307 && INTVAL (index) > -1024
7308 && (INTVAL (index) & 3) == 0);
7310 /* We have no such constraint on double mode offsets, so we permit the
7311 full range of the instruction format. */
7312 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7313 return (code == CONST_INT
7314 && INTVAL (index) < 1024
7315 && INTVAL (index) > -1024
7316 && (INTVAL (index) & 3) == 0);
7318 if (arm_address_register_rtx_p (index, strict_p)
7319 && (GET_MODE_SIZE (mode) <= 4))
7320 return 1;
7322 if (mode == DImode || mode == DFmode)
7324 if (code == CONST_INT)
7326 HOST_WIDE_INT val = INTVAL (index);
7327 /* ??? Can we assume ldrd for thumb2? */
7328 /* Thumb-2 ldrd only has reg+const addressing modes. */
7329 /* ldrd supports offsets of +-1020.
7330 However the ldr fallback does not. */
7331 return val > -256 && val < 256 && (val & 3) == 0;
7333 else
7334 return 0;
7337 if (code == MULT)
7339 rtx xiop0 = XEXP (index, 0);
7340 rtx xiop1 = XEXP (index, 1);
7342 return ((arm_address_register_rtx_p (xiop0, strict_p)
7343 && thumb2_index_mul_operand (xiop1))
7344 || (arm_address_register_rtx_p (xiop1, strict_p)
7345 && thumb2_index_mul_operand (xiop0)));
7347 else if (code == ASHIFT)
7349 rtx op = XEXP (index, 1);
7351 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7352 && CONST_INT_P (op)
7353 && INTVAL (op) > 0
7354 && INTVAL (op) <= 3);
7357 return (code == CONST_INT
7358 && INTVAL (index) < 4096
7359 && INTVAL (index) > -256);
7362 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7363 static int
7364 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7366 int regno;
7368 if (!REG_P (x))
7369 return 0;
7371 regno = REGNO (x);
7373 if (strict_p)
7374 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7376 return (regno <= LAST_LO_REGNUM
7377 || regno > LAST_VIRTUAL_REGISTER
7378 || regno == FRAME_POINTER_REGNUM
7379 || (GET_MODE_SIZE (mode) >= 4
7380 && (regno == STACK_POINTER_REGNUM
7381 || regno >= FIRST_PSEUDO_REGISTER
7382 || x == hard_frame_pointer_rtx
7383 || x == arg_pointer_rtx)));
7386 /* Return nonzero if x is a legitimate index register. This is the case
7387 for any base register that can access a QImode object. */
7388 inline static int
7389 thumb1_index_register_rtx_p (rtx x, int strict_p)
7391 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7394 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7396 The AP may be eliminated to either the SP or the FP, so we use the
7397 least common denominator, e.g. SImode, and offsets from 0 to 64.
7399 ??? Verify whether the above is the right approach.
7401 ??? Also, the FP may be eliminated to the SP, so perhaps that
7402 needs special handling also.
7404 ??? Look at how the mips16 port solves this problem. It probably uses
7405 better ways to solve some of these problems.
7407 Although it is not incorrect, we don't accept QImode and HImode
7408 addresses based on the frame pointer or arg pointer until the
7409 reload pass starts. This is so that eliminating such addresses
7410 into stack based ones won't produce impossible code. */
7412 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7414 /* ??? Not clear if this is right. Experiment. */
7415 if (GET_MODE_SIZE (mode) < 4
7416 && !(reload_in_progress || reload_completed)
7417 && (reg_mentioned_p (frame_pointer_rtx, x)
7418 || reg_mentioned_p (arg_pointer_rtx, x)
7419 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7420 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7421 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7422 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7423 return 0;
7425 /* Accept any base register. SP only in SImode or larger. */
7426 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7427 return 1;
7429 /* This is PC relative data before arm_reorg runs. */
7430 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7431 && GET_CODE (x) == SYMBOL_REF
7432 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7433 return 1;
7435 /* This is PC relative data after arm_reorg runs. */
7436 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7437 && reload_completed
7438 && (GET_CODE (x) == LABEL_REF
7439 || (GET_CODE (x) == CONST
7440 && GET_CODE (XEXP (x, 0)) == PLUS
7441 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7442 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7443 return 1;
7445 /* Post-inc indexing only supported for SImode and larger. */
7446 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7447 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7448 return 1;
7450 else if (GET_CODE (x) == PLUS)
7452 /* REG+REG address can be any two index registers. */
7453 /* We disallow FRAME+REG addressing since we know that FRAME
7454 will be replaced with STACK, and SP relative addressing only
7455 permits SP+OFFSET. */
7456 if (GET_MODE_SIZE (mode) <= 4
7457 && XEXP (x, 0) != frame_pointer_rtx
7458 && XEXP (x, 1) != frame_pointer_rtx
7459 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7460 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7461 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7462 return 1;
7464 /* REG+const has 5-7 bit offset for non-SP registers. */
7465 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7466 || XEXP (x, 0) == arg_pointer_rtx)
7467 && CONST_INT_P (XEXP (x, 1))
7468 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7469 return 1;
7471 /* REG+const has 10-bit offset for SP, but only SImode and
7472 larger is supported. */
7473 /* ??? Should probably check for DI/DFmode overflow here
7474 just like GO_IF_LEGITIMATE_OFFSET does. */
7475 else if (REG_P (XEXP (x, 0))
7476 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7477 && GET_MODE_SIZE (mode) >= 4
7478 && CONST_INT_P (XEXP (x, 1))
7479 && INTVAL (XEXP (x, 1)) >= 0
7480 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7481 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7482 return 1;
7484 else if (REG_P (XEXP (x, 0))
7485 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7486 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7487 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7488 && REGNO (XEXP (x, 0))
7489 <= LAST_VIRTUAL_POINTER_REGISTER))
7490 && GET_MODE_SIZE (mode) >= 4
7491 && CONST_INT_P (XEXP (x, 1))
7492 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7493 return 1;
7496 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7497 && GET_MODE_SIZE (mode) == 4
7498 && GET_CODE (x) == SYMBOL_REF
7499 && CONSTANT_POOL_ADDRESS_P (x)
7500 && ! (flag_pic
7501 && symbol_mentioned_p (get_pool_constant (x))
7502 && ! pcrel_constant_p (get_pool_constant (x))))
7503 return 1;
7505 return 0;
7508 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7509 instruction of mode MODE. */
7511 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7513 switch (GET_MODE_SIZE (mode))
7515 case 1:
7516 return val >= 0 && val < 32;
7518 case 2:
7519 return val >= 0 && val < 64 && (val & 1) == 0;
7521 default:
7522 return (val >= 0
7523 && (val + GET_MODE_SIZE (mode)) <= 128
7524 && (val & 3) == 0);
7528 bool
7529 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7531 if (TARGET_ARM)
7532 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7533 else if (TARGET_THUMB2)
7534 return thumb2_legitimate_address_p (mode, x, strict_p);
7535 else /* if (TARGET_THUMB1) */
7536 return thumb1_legitimate_address_p (mode, x, strict_p);
7539 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7541 Given an rtx X being reloaded into a reg required to be
7542 in class CLASS, return the class of reg to actually use.
7543 In general this is just CLASS, but for the Thumb core registers and
7544 immediate constants we prefer a LO_REGS class or a subset. */
7546 static reg_class_t
7547 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7549 if (TARGET_32BIT)
7550 return rclass;
7551 else
7553 if (rclass == GENERAL_REGS)
7554 return LO_REGS;
7555 else
7556 return rclass;
7560 /* Build the SYMBOL_REF for __tls_get_addr. */
7562 static GTY(()) rtx tls_get_addr_libfunc;
7564 static rtx
7565 get_tls_get_addr (void)
7567 if (!tls_get_addr_libfunc)
7568 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7569 return tls_get_addr_libfunc;
7573 arm_load_tp (rtx target)
7575 if (!target)
7576 target = gen_reg_rtx (SImode);
7578 if (TARGET_HARD_TP)
7580 /* Can return in any reg. */
7581 emit_insn (gen_load_tp_hard (target));
7583 else
7585 /* Always returned in r0. Immediately copy the result into a pseudo,
7586 otherwise other uses of r0 (e.g. setting up function arguments) may
7587 clobber the value. */
7589 rtx tmp;
7591 emit_insn (gen_load_tp_soft ());
7593 tmp = gen_rtx_REG (SImode, R0_REGNUM);
7594 emit_move_insn (target, tmp);
7596 return target;
7599 static rtx
7600 load_tls_operand (rtx x, rtx reg)
7602 rtx tmp;
7604 if (reg == NULL_RTX)
7605 reg = gen_reg_rtx (SImode);
7607 tmp = gen_rtx_CONST (SImode, x);
7609 emit_move_insn (reg, tmp);
7611 return reg;
7614 static rtx
7615 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7617 rtx insns, label, labelno, sum;
7619 gcc_assert (reloc != TLS_DESCSEQ);
7620 start_sequence ();
7622 labelno = GEN_INT (pic_labelno++);
7623 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7624 label = gen_rtx_CONST (VOIDmode, label);
7626 sum = gen_rtx_UNSPEC (Pmode,
7627 gen_rtvec (4, x, GEN_INT (reloc), label,
7628 GEN_INT (TARGET_ARM ? 8 : 4)),
7629 UNSPEC_TLS);
7630 reg = load_tls_operand (sum, reg);
7632 if (TARGET_ARM)
7633 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7634 else
7635 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7637 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7638 LCT_PURE, /* LCT_CONST? */
7639 Pmode, 1, reg, Pmode);
7641 insns = get_insns ();
7642 end_sequence ();
7644 return insns;
7647 static rtx
7648 arm_tls_descseq_addr (rtx x, rtx reg)
7650 rtx labelno = GEN_INT (pic_labelno++);
7651 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7652 rtx sum = gen_rtx_UNSPEC (Pmode,
7653 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7654 gen_rtx_CONST (VOIDmode, label),
7655 GEN_INT (!TARGET_ARM)),
7656 UNSPEC_TLS);
7657 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
7659 emit_insn (gen_tlscall (x, labelno));
7660 if (!reg)
7661 reg = gen_reg_rtx (SImode);
7662 else
7663 gcc_assert (REGNO (reg) != R0_REGNUM);
7665 emit_move_insn (reg, reg0);
7667 return reg;
7671 legitimize_tls_address (rtx x, rtx reg)
7673 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7674 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7676 switch (model)
7678 case TLS_MODEL_GLOBAL_DYNAMIC:
7679 if (TARGET_GNU2_TLS)
7681 reg = arm_tls_descseq_addr (x, reg);
7683 tp = arm_load_tp (NULL_RTX);
7685 dest = gen_rtx_PLUS (Pmode, tp, reg);
7687 else
7689 /* Original scheme */
7690 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7691 dest = gen_reg_rtx (Pmode);
7692 emit_libcall_block (insns, dest, ret, x);
7694 return dest;
7696 case TLS_MODEL_LOCAL_DYNAMIC:
7697 if (TARGET_GNU2_TLS)
7699 reg = arm_tls_descseq_addr (x, reg);
7701 tp = arm_load_tp (NULL_RTX);
7703 dest = gen_rtx_PLUS (Pmode, tp, reg);
7705 else
7707 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7709 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7710 share the LDM result with other LD model accesses. */
7711 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7712 UNSPEC_TLS);
7713 dest = gen_reg_rtx (Pmode);
7714 emit_libcall_block (insns, dest, ret, eqv);
7716 /* Load the addend. */
7717 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7718 GEN_INT (TLS_LDO32)),
7719 UNSPEC_TLS);
7720 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7721 dest = gen_rtx_PLUS (Pmode, dest, addend);
7723 return dest;
7725 case TLS_MODEL_INITIAL_EXEC:
7726 labelno = GEN_INT (pic_labelno++);
7727 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7728 label = gen_rtx_CONST (VOIDmode, label);
7729 sum = gen_rtx_UNSPEC (Pmode,
7730 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7731 GEN_INT (TARGET_ARM ? 8 : 4)),
7732 UNSPEC_TLS);
7733 reg = load_tls_operand (sum, reg);
7735 if (TARGET_ARM)
7736 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7737 else if (TARGET_THUMB2)
7738 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7739 else
7741 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7742 emit_move_insn (reg, gen_const_mem (SImode, reg));
7745 tp = arm_load_tp (NULL_RTX);
7747 return gen_rtx_PLUS (Pmode, tp, reg);
7749 case TLS_MODEL_LOCAL_EXEC:
7750 tp = arm_load_tp (NULL_RTX);
7752 reg = gen_rtx_UNSPEC (Pmode,
7753 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7754 UNSPEC_TLS);
7755 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7757 return gen_rtx_PLUS (Pmode, tp, reg);
7759 default:
7760 abort ();
7764 /* Try machine-dependent ways of modifying an illegitimate address
7765 to be legitimate. If we find one, return the new, valid address. */
7767 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7769 if (arm_tls_referenced_p (x))
7771 rtx addend = NULL;
7773 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7775 addend = XEXP (XEXP (x, 0), 1);
7776 x = XEXP (XEXP (x, 0), 0);
7779 if (GET_CODE (x) != SYMBOL_REF)
7780 return x;
7782 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7784 x = legitimize_tls_address (x, NULL_RTX);
7786 if (addend)
7788 x = gen_rtx_PLUS (SImode, x, addend);
7789 orig_x = x;
7791 else
7792 return x;
7795 if (!TARGET_ARM)
7797 /* TODO: legitimize_address for Thumb2. */
7798 if (TARGET_THUMB2)
7799 return x;
7800 return thumb_legitimize_address (x, orig_x, mode);
7803 if (GET_CODE (x) == PLUS)
7805 rtx xop0 = XEXP (x, 0);
7806 rtx xop1 = XEXP (x, 1);
7808 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7809 xop0 = force_reg (SImode, xop0);
7811 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7812 && !symbol_mentioned_p (xop1))
7813 xop1 = force_reg (SImode, xop1);
7815 if (ARM_BASE_REGISTER_RTX_P (xop0)
7816 && CONST_INT_P (xop1))
7818 HOST_WIDE_INT n, low_n;
7819 rtx base_reg, val;
7820 n = INTVAL (xop1);
7822 /* VFP addressing modes actually allow greater offsets, but for
7823 now we just stick with the lowest common denominator. */
7824 if (mode == DImode
7825 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7827 low_n = n & 0x0f;
7828 n &= ~0x0f;
7829 if (low_n > 4)
7831 n += 16;
7832 low_n -= 16;
7835 else
7837 low_n = ((mode) == TImode ? 0
7838 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7839 n -= low_n;
7842 base_reg = gen_reg_rtx (SImode);
7843 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7844 emit_move_insn (base_reg, val);
7845 x = plus_constant (Pmode, base_reg, low_n);
7847 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7848 x = gen_rtx_PLUS (SImode, xop0, xop1);
7851 /* XXX We don't allow MINUS any more -- see comment in
7852 arm_legitimate_address_outer_p (). */
7853 else if (GET_CODE (x) == MINUS)
7855 rtx xop0 = XEXP (x, 0);
7856 rtx xop1 = XEXP (x, 1);
7858 if (CONSTANT_P (xop0))
7859 xop0 = force_reg (SImode, xop0);
7861 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7862 xop1 = force_reg (SImode, xop1);
7864 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7865 x = gen_rtx_MINUS (SImode, xop0, xop1);
7868 /* Make sure to take full advantage of the pre-indexed addressing mode
7869 with absolute addresses which often allows for the base register to
7870 be factorized for multiple adjacent memory references, and it might
7871 even allows for the mini pool to be avoided entirely. */
7872 else if (CONST_INT_P (x) && optimize > 0)
7874 unsigned int bits;
7875 HOST_WIDE_INT mask, base, index;
7876 rtx base_reg;
7878 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7879 use a 8-bit index. So let's use a 12-bit index for SImode only and
7880 hope that arm_gen_constant will enable ldrb to use more bits. */
7881 bits = (mode == SImode) ? 12 : 8;
7882 mask = (1 << bits) - 1;
7883 base = INTVAL (x) & ~mask;
7884 index = INTVAL (x) & mask;
7885 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7887 /* It'll most probably be more efficient to generate the base
7888 with more bits set and use a negative index instead. */
7889 base |= mask;
7890 index -= mask;
7892 base_reg = force_reg (SImode, GEN_INT (base));
7893 x = plus_constant (Pmode, base_reg, index);
7896 if (flag_pic)
7898 /* We need to find and carefully transform any SYMBOL and LABEL
7899 references; so go back to the original address expression. */
7900 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7902 if (new_x != orig_x)
7903 x = new_x;
7906 return x;
7910 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7911 to be legitimate. If we find one, return the new, valid address. */
7913 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7915 if (GET_CODE (x) == PLUS
7916 && CONST_INT_P (XEXP (x, 1))
7917 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7918 || INTVAL (XEXP (x, 1)) < 0))
7920 rtx xop0 = XEXP (x, 0);
7921 rtx xop1 = XEXP (x, 1);
7922 HOST_WIDE_INT offset = INTVAL (xop1);
7924 /* Try and fold the offset into a biasing of the base register and
7925 then offsetting that. Don't do this when optimizing for space
7926 since it can cause too many CSEs. */
7927 if (optimize_size && offset >= 0
7928 && offset < 256 + 31 * GET_MODE_SIZE (mode))
7930 HOST_WIDE_INT delta;
7932 if (offset >= 256)
7933 delta = offset - (256 - GET_MODE_SIZE (mode));
7934 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7935 delta = 31 * GET_MODE_SIZE (mode);
7936 else
7937 delta = offset & (~31 * GET_MODE_SIZE (mode));
7939 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7940 NULL_RTX);
7941 x = plus_constant (Pmode, xop0, delta);
7943 else if (offset < 0 && offset > -256)
7944 /* Small negative offsets are best done with a subtract before the
7945 dereference, forcing these into a register normally takes two
7946 instructions. */
7947 x = force_operand (x, NULL_RTX);
7948 else
7950 /* For the remaining cases, force the constant into a register. */
7951 xop1 = force_reg (SImode, xop1);
7952 x = gen_rtx_PLUS (SImode, xop0, xop1);
7955 else if (GET_CODE (x) == PLUS
7956 && s_register_operand (XEXP (x, 1), SImode)
7957 && !s_register_operand (XEXP (x, 0), SImode))
7959 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7961 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7964 if (flag_pic)
7966 /* We need to find and carefully transform any SYMBOL and LABEL
7967 references; so go back to the original address expression. */
7968 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7970 if (new_x != orig_x)
7971 x = new_x;
7974 return x;
7977 /* Return TRUE if X contains any TLS symbol references. */
7979 bool
7980 arm_tls_referenced_p (rtx x)
7982 if (! TARGET_HAVE_TLS)
7983 return false;
7985 subrtx_iterator::array_type array;
7986 FOR_EACH_SUBRTX (iter, array, x, ALL)
7988 const_rtx x = *iter;
7989 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
7990 return true;
7992 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7993 TLS offsets, not real symbol references. */
7994 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
7995 iter.skip_subrtxes ();
7997 return false;
8000 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8002 On the ARM, allow any integer (invalid ones are removed later by insn
8003 patterns), nice doubles and symbol_refs which refer to the function's
8004 constant pool XXX.
8006 When generating pic allow anything. */
8008 static bool
8009 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8011 return flag_pic || !label_mentioned_p (x);
8014 static bool
8015 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8017 return (CONST_INT_P (x)
8018 || CONST_DOUBLE_P (x)
8019 || CONSTANT_ADDRESS_P (x)
8020 || flag_pic);
8023 static bool
8024 arm_legitimate_constant_p (machine_mode mode, rtx x)
8026 return (!arm_cannot_force_const_mem (mode, x)
8027 && (TARGET_32BIT
8028 ? arm_legitimate_constant_p_1 (mode, x)
8029 : thumb_legitimate_constant_p (mode, x)));
8032 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8034 static bool
8035 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8037 rtx base, offset;
8039 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8041 split_const (x, &base, &offset);
8042 if (GET_CODE (base) == SYMBOL_REF
8043 && !offset_within_block_p (base, INTVAL (offset)))
8044 return true;
8046 return arm_tls_referenced_p (x);
8049 #define REG_OR_SUBREG_REG(X) \
8050 (REG_P (X) \
8051 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8053 #define REG_OR_SUBREG_RTX(X) \
8054 (REG_P (X) ? (X) : SUBREG_REG (X))
8056 static inline int
8057 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8059 machine_mode mode = GET_MODE (x);
8060 int total, words;
8062 switch (code)
8064 case ASHIFT:
8065 case ASHIFTRT:
8066 case LSHIFTRT:
8067 case ROTATERT:
8068 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8070 case PLUS:
8071 case MINUS:
8072 case COMPARE:
8073 case NEG:
8074 case NOT:
8075 return COSTS_N_INSNS (1);
8077 case MULT:
8078 if (CONST_INT_P (XEXP (x, 1)))
8080 int cycles = 0;
8081 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8083 while (i)
8085 i >>= 2;
8086 cycles++;
8088 return COSTS_N_INSNS (2) + cycles;
8090 return COSTS_N_INSNS (1) + 16;
8092 case SET:
8093 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8094 the mode. */
8095 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8096 return (COSTS_N_INSNS (words)
8097 + 4 * ((MEM_P (SET_SRC (x)))
8098 + MEM_P (SET_DEST (x))));
8100 case CONST_INT:
8101 if (outer == SET)
8103 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8104 return 0;
8105 if (thumb_shiftable_const (INTVAL (x)))
8106 return COSTS_N_INSNS (2);
8107 return COSTS_N_INSNS (3);
8109 else if ((outer == PLUS || outer == COMPARE)
8110 && INTVAL (x) < 256 && INTVAL (x) > -256)
8111 return 0;
8112 else if ((outer == IOR || outer == XOR || outer == AND)
8113 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8114 return COSTS_N_INSNS (1);
8115 else if (outer == AND)
8117 int i;
8118 /* This duplicates the tests in the andsi3 expander. */
8119 for (i = 9; i <= 31; i++)
8120 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8121 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8122 return COSTS_N_INSNS (2);
8124 else if (outer == ASHIFT || outer == ASHIFTRT
8125 || outer == LSHIFTRT)
8126 return 0;
8127 return COSTS_N_INSNS (2);
8129 case CONST:
8130 case CONST_DOUBLE:
8131 case LABEL_REF:
8132 case SYMBOL_REF:
8133 return COSTS_N_INSNS (3);
8135 case UDIV:
8136 case UMOD:
8137 case DIV:
8138 case MOD:
8139 return 100;
8141 case TRUNCATE:
8142 return 99;
8144 case AND:
8145 case XOR:
8146 case IOR:
8147 /* XXX guess. */
8148 return 8;
8150 case MEM:
8151 /* XXX another guess. */
8152 /* Memory costs quite a lot for the first word, but subsequent words
8153 load at the equivalent of a single insn each. */
8154 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8155 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8156 ? 4 : 0));
8158 case IF_THEN_ELSE:
8159 /* XXX a guess. */
8160 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8161 return 14;
8162 return 2;
8164 case SIGN_EXTEND:
8165 case ZERO_EXTEND:
8166 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8167 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8169 if (mode == SImode)
8170 return total;
8172 if (arm_arch6)
8173 return total + COSTS_N_INSNS (1);
8175 /* Assume a two-shift sequence. Increase the cost slightly so
8176 we prefer actual shifts over an extend operation. */
8177 return total + 1 + COSTS_N_INSNS (2);
8179 default:
8180 return 99;
8184 static inline bool
8185 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8187 machine_mode mode = GET_MODE (x);
8188 enum rtx_code subcode;
8189 rtx operand;
8190 enum rtx_code code = GET_CODE (x);
8191 *total = 0;
8193 switch (code)
8195 case MEM:
8196 /* Memory costs quite a lot for the first word, but subsequent words
8197 load at the equivalent of a single insn each. */
8198 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8199 return true;
8201 case DIV:
8202 case MOD:
8203 case UDIV:
8204 case UMOD:
8205 if (TARGET_HARD_FLOAT && mode == SFmode)
8206 *total = COSTS_N_INSNS (2);
8207 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8208 *total = COSTS_N_INSNS (4);
8209 else
8210 *total = COSTS_N_INSNS (20);
8211 return false;
8213 case ROTATE:
8214 if (REG_P (XEXP (x, 1)))
8215 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8216 else if (!CONST_INT_P (XEXP (x, 1)))
8217 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8219 /* Fall through */
8220 case ROTATERT:
8221 if (mode != SImode)
8223 *total += COSTS_N_INSNS (4);
8224 return true;
8227 /* Fall through */
8228 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8229 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8230 if (mode == DImode)
8232 *total += COSTS_N_INSNS (3);
8233 return true;
8236 *total += COSTS_N_INSNS (1);
8237 /* Increase the cost of complex shifts because they aren't any faster,
8238 and reduce dual issue opportunities. */
8239 if (arm_tune_cortex_a9
8240 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8241 ++*total;
8243 return true;
8245 case MINUS:
8246 if (mode == DImode)
8248 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8249 if (CONST_INT_P (XEXP (x, 0))
8250 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8252 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8253 return true;
8256 if (CONST_INT_P (XEXP (x, 1))
8257 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8259 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8260 return true;
8263 return false;
8266 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8268 if (TARGET_HARD_FLOAT
8269 && (mode == SFmode
8270 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8272 *total = COSTS_N_INSNS (1);
8273 if (CONST_DOUBLE_P (XEXP (x, 0))
8274 && arm_const_double_rtx (XEXP (x, 0)))
8276 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8277 return true;
8280 if (CONST_DOUBLE_P (XEXP (x, 1))
8281 && arm_const_double_rtx (XEXP (x, 1)))
8283 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8284 return true;
8287 return false;
8289 *total = COSTS_N_INSNS (20);
8290 return false;
8293 *total = COSTS_N_INSNS (1);
8294 if (CONST_INT_P (XEXP (x, 0))
8295 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8297 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8298 return true;
8301 subcode = GET_CODE (XEXP (x, 1));
8302 if (subcode == ASHIFT || subcode == ASHIFTRT
8303 || subcode == LSHIFTRT
8304 || subcode == ROTATE || subcode == ROTATERT)
8306 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8307 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8308 return true;
8311 /* A shift as a part of RSB costs no more than RSB itself. */
8312 if (GET_CODE (XEXP (x, 0)) == MULT
8313 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8315 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8316 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8317 return true;
8320 if (subcode == MULT
8321 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8323 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8324 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8325 return true;
8328 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8329 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8331 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8332 if (REG_P (XEXP (XEXP (x, 1), 0))
8333 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8334 *total += COSTS_N_INSNS (1);
8336 return true;
8339 /* Fall through */
8341 case PLUS:
8342 if (code == PLUS && arm_arch6 && mode == SImode
8343 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8344 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8346 *total = COSTS_N_INSNS (1);
8347 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8348 0, speed);
8349 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8350 return true;
8353 /* MLA: All arguments must be registers. We filter out
8354 multiplication by a power of two, so that we fall down into
8355 the code below. */
8356 if (GET_CODE (XEXP (x, 0)) == MULT
8357 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8359 /* The cost comes from the cost of the multiply. */
8360 return false;
8363 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8365 if (TARGET_HARD_FLOAT
8366 && (mode == SFmode
8367 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8369 *total = COSTS_N_INSNS (1);
8370 if (CONST_DOUBLE_P (XEXP (x, 1))
8371 && arm_const_double_rtx (XEXP (x, 1)))
8373 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8374 return true;
8377 return false;
8380 *total = COSTS_N_INSNS (20);
8381 return false;
8384 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8385 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8387 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8388 if (REG_P (XEXP (XEXP (x, 0), 0))
8389 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8390 *total += COSTS_N_INSNS (1);
8391 return true;
8394 /* Fall through */
8396 case AND: case XOR: case IOR:
8398 /* Normally the frame registers will be spilt into reg+const during
8399 reload, so it is a bad idea to combine them with other instructions,
8400 since then they might not be moved outside of loops. As a compromise
8401 we allow integration with ops that have a constant as their second
8402 operand. */
8403 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8404 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8405 && !CONST_INT_P (XEXP (x, 1)))
8406 *total = COSTS_N_INSNS (1);
8408 if (mode == DImode)
8410 *total += COSTS_N_INSNS (2);
8411 if (CONST_INT_P (XEXP (x, 1))
8412 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8414 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8415 return true;
8418 return false;
8421 *total += COSTS_N_INSNS (1);
8422 if (CONST_INT_P (XEXP (x, 1))
8423 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8425 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8426 return true;
8428 subcode = GET_CODE (XEXP (x, 0));
8429 if (subcode == ASHIFT || subcode == ASHIFTRT
8430 || subcode == LSHIFTRT
8431 || subcode == ROTATE || subcode == ROTATERT)
8433 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8434 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8435 return true;
8438 if (subcode == MULT
8439 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8441 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8442 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8443 return true;
8446 if (subcode == UMIN || subcode == UMAX
8447 || subcode == SMIN || subcode == SMAX)
8449 *total = COSTS_N_INSNS (3);
8450 return true;
8453 return false;
8455 case MULT:
8456 /* This should have been handled by the CPU specific routines. */
8457 gcc_unreachable ();
8459 case TRUNCATE:
8460 if (arm_arch3m && mode == SImode
8461 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8462 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8463 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8464 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8465 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8466 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8468 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8469 return true;
8471 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8472 return false;
8474 case NEG:
8475 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8477 if (TARGET_HARD_FLOAT
8478 && (mode == SFmode
8479 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8481 *total = COSTS_N_INSNS (1);
8482 return false;
8484 *total = COSTS_N_INSNS (2);
8485 return false;
8488 /* Fall through */
8489 case NOT:
8490 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8491 if (mode == SImode && code == NOT)
8493 subcode = GET_CODE (XEXP (x, 0));
8494 if (subcode == ASHIFT || subcode == ASHIFTRT
8495 || subcode == LSHIFTRT
8496 || subcode == ROTATE || subcode == ROTATERT
8497 || (subcode == MULT
8498 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8500 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8501 /* Register shifts cost an extra cycle. */
8502 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8503 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8504 subcode, 1, speed);
8505 return true;
8509 return false;
8511 case IF_THEN_ELSE:
8512 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8514 *total = COSTS_N_INSNS (4);
8515 return true;
8518 operand = XEXP (x, 0);
8520 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8521 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8522 && REG_P (XEXP (operand, 0))
8523 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8524 *total += COSTS_N_INSNS (1);
8525 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8526 + rtx_cost (XEXP (x, 2), code, 2, speed));
8527 return true;
8529 case NE:
8530 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8532 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8533 return true;
8535 goto scc_insn;
8537 case GE:
8538 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8539 && mode == SImode && XEXP (x, 1) == const0_rtx)
8541 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8542 return true;
8544 goto scc_insn;
8546 case LT:
8547 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8548 && mode == SImode && XEXP (x, 1) == const0_rtx)
8550 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8551 return true;
8553 goto scc_insn;
8555 case EQ:
8556 case GT:
8557 case LE:
8558 case GEU:
8559 case LTU:
8560 case GTU:
8561 case LEU:
8562 case UNORDERED:
8563 case ORDERED:
8564 case UNEQ:
8565 case UNGE:
8566 case UNLT:
8567 case UNGT:
8568 case UNLE:
8569 scc_insn:
8570 /* SCC insns. In the case where the comparison has already been
8571 performed, then they cost 2 instructions. Otherwise they need
8572 an additional comparison before them. */
8573 *total = COSTS_N_INSNS (2);
8574 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8576 return true;
8579 /* Fall through */
8580 case COMPARE:
8581 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8583 *total = 0;
8584 return true;
8587 *total += COSTS_N_INSNS (1);
8588 if (CONST_INT_P (XEXP (x, 1))
8589 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8591 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8592 return true;
8595 subcode = GET_CODE (XEXP (x, 0));
8596 if (subcode == ASHIFT || subcode == ASHIFTRT
8597 || subcode == LSHIFTRT
8598 || subcode == ROTATE || subcode == ROTATERT)
8600 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8601 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8602 return true;
8605 if (subcode == MULT
8606 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8608 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8609 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8610 return true;
8613 return false;
8615 case UMIN:
8616 case UMAX:
8617 case SMIN:
8618 case SMAX:
8619 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8620 if (!CONST_INT_P (XEXP (x, 1))
8621 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8622 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8623 return true;
8625 case ABS:
8626 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8628 if (TARGET_HARD_FLOAT
8629 && (mode == SFmode
8630 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8632 *total = COSTS_N_INSNS (1);
8633 return false;
8635 *total = COSTS_N_INSNS (20);
8636 return false;
8638 *total = COSTS_N_INSNS (1);
8639 if (mode == DImode)
8640 *total += COSTS_N_INSNS (3);
8641 return false;
8643 case SIGN_EXTEND:
8644 case ZERO_EXTEND:
8645 *total = 0;
8646 if (GET_MODE_CLASS (mode) == MODE_INT)
8648 rtx op = XEXP (x, 0);
8649 machine_mode opmode = GET_MODE (op);
8651 if (mode == DImode)
8652 *total += COSTS_N_INSNS (1);
8654 if (opmode != SImode)
8656 if (MEM_P (op))
8658 /* If !arm_arch4, we use one of the extendhisi2_mem
8659 or movhi_bytes patterns for HImode. For a QImode
8660 sign extension, we first zero-extend from memory
8661 and then perform a shift sequence. */
8662 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8663 *total += COSTS_N_INSNS (2);
8665 else if (arm_arch6)
8666 *total += COSTS_N_INSNS (1);
8668 /* We don't have the necessary insn, so we need to perform some
8669 other operation. */
8670 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8671 /* An and with constant 255. */
8672 *total += COSTS_N_INSNS (1);
8673 else
8674 /* A shift sequence. Increase costs slightly to avoid
8675 combining two shifts into an extend operation. */
8676 *total += COSTS_N_INSNS (2) + 1;
8679 return false;
8682 switch (GET_MODE (XEXP (x, 0)))
8684 case V8QImode:
8685 case V4HImode:
8686 case V2SImode:
8687 case V4QImode:
8688 case V2HImode:
8689 *total = COSTS_N_INSNS (1);
8690 return false;
8692 default:
8693 gcc_unreachable ();
8695 gcc_unreachable ();
8697 case ZERO_EXTRACT:
8698 case SIGN_EXTRACT:
8699 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8700 return true;
8702 case CONST_INT:
8703 if (const_ok_for_arm (INTVAL (x))
8704 || const_ok_for_arm (~INTVAL (x)))
8705 *total = COSTS_N_INSNS (1);
8706 else
8707 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8708 INTVAL (x), NULL_RTX,
8709 NULL_RTX, 0, 0));
8710 return true;
8712 case CONST:
8713 case LABEL_REF:
8714 case SYMBOL_REF:
8715 *total = COSTS_N_INSNS (3);
8716 return true;
8718 case HIGH:
8719 *total = COSTS_N_INSNS (1);
8720 return true;
8722 case LO_SUM:
8723 *total = COSTS_N_INSNS (1);
8724 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8725 return true;
8727 case CONST_DOUBLE:
8728 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8729 && (mode == SFmode || !TARGET_VFP_SINGLE))
8730 *total = COSTS_N_INSNS (1);
8731 else
8732 *total = COSTS_N_INSNS (4);
8733 return true;
8735 case SET:
8736 /* The vec_extract patterns accept memory operands that require an
8737 address reload. Account for the cost of that reload to give the
8738 auto-inc-dec pass an incentive to try to replace them. */
8739 if (TARGET_NEON && MEM_P (SET_DEST (x))
8740 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8742 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8743 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8744 *total += COSTS_N_INSNS (1);
8745 return true;
8747 /* Likewise for the vec_set patterns. */
8748 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8749 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8750 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8752 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8753 *total = rtx_cost (mem, code, 0, speed);
8754 if (!neon_vector_mem_operand (mem, 2, true))
8755 *total += COSTS_N_INSNS (1);
8756 return true;
8758 return false;
8760 case UNSPEC:
8761 /* We cost this as high as our memory costs to allow this to
8762 be hoisted from loops. */
8763 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8765 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8767 return true;
8769 case CONST_VECTOR:
8770 if (TARGET_NEON
8771 && TARGET_HARD_FLOAT
8772 && outer == SET
8773 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8774 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8775 *total = COSTS_N_INSNS (1);
8776 else
8777 *total = COSTS_N_INSNS (4);
8778 return true;
8780 default:
8781 *total = COSTS_N_INSNS (4);
8782 return false;
8786 /* Estimates the size cost of thumb1 instructions.
8787 For now most of the code is copied from thumb1_rtx_costs. We need more
8788 fine grain tuning when we have more related test cases. */
8789 static inline int
8790 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8792 machine_mode mode = GET_MODE (x);
8793 int words;
8795 switch (code)
8797 case ASHIFT:
8798 case ASHIFTRT:
8799 case LSHIFTRT:
8800 case ROTATERT:
8801 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8803 case PLUS:
8804 case MINUS:
8805 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8806 defined by RTL expansion, especially for the expansion of
8807 multiplication. */
8808 if ((GET_CODE (XEXP (x, 0)) == MULT
8809 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8810 || (GET_CODE (XEXP (x, 1)) == MULT
8811 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8812 return COSTS_N_INSNS (2);
8813 /* On purpose fall through for normal RTX. */
8814 case COMPARE:
8815 case NEG:
8816 case NOT:
8817 return COSTS_N_INSNS (1);
8819 case MULT:
8820 if (CONST_INT_P (XEXP (x, 1)))
8822 /* Thumb1 mul instruction can't operate on const. We must Load it
8823 into a register first. */
8824 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8825 /* For the targets which have a very small and high-latency multiply
8826 unit, we prefer to synthesize the mult with up to 5 instructions,
8827 giving a good balance between size and performance. */
8828 if (arm_arch6m && arm_m_profile_small_mul)
8829 return COSTS_N_INSNS (5);
8830 else
8831 return COSTS_N_INSNS (1) + const_size;
8833 return COSTS_N_INSNS (1);
8835 case SET:
8836 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8837 the mode. */
8838 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8839 return COSTS_N_INSNS (words)
8840 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x))
8841 || satisfies_constraint_K (SET_SRC (x))
8842 /* thumb1_movdi_insn. */
8843 || ((words > 1) && MEM_P (SET_SRC (x))));
8845 case CONST_INT:
8846 if (outer == SET)
8848 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8849 return COSTS_N_INSNS (1);
8850 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8851 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8852 return COSTS_N_INSNS (2);
8853 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8854 if (thumb_shiftable_const (INTVAL (x)))
8855 return COSTS_N_INSNS (2);
8856 return COSTS_N_INSNS (3);
8858 else if ((outer == PLUS || outer == COMPARE)
8859 && INTVAL (x) < 256 && INTVAL (x) > -256)
8860 return 0;
8861 else if ((outer == IOR || outer == XOR || outer == AND)
8862 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8863 return COSTS_N_INSNS (1);
8864 else if (outer == AND)
8866 int i;
8867 /* This duplicates the tests in the andsi3 expander. */
8868 for (i = 9; i <= 31; i++)
8869 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8870 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8871 return COSTS_N_INSNS (2);
8873 else if (outer == ASHIFT || outer == ASHIFTRT
8874 || outer == LSHIFTRT)
8875 return 0;
8876 return COSTS_N_INSNS (2);
8878 case CONST:
8879 case CONST_DOUBLE:
8880 case LABEL_REF:
8881 case SYMBOL_REF:
8882 return COSTS_N_INSNS (3);
8884 case UDIV:
8885 case UMOD:
8886 case DIV:
8887 case MOD:
8888 return 100;
8890 case TRUNCATE:
8891 return 99;
8893 case AND:
8894 case XOR:
8895 case IOR:
8896 return COSTS_N_INSNS (1);
8898 case MEM:
8899 return (COSTS_N_INSNS (1)
8900 + COSTS_N_INSNS (1)
8901 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8902 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8903 ? COSTS_N_INSNS (1) : 0));
8905 case IF_THEN_ELSE:
8906 /* XXX a guess. */
8907 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8908 return 14;
8909 return 2;
8911 case ZERO_EXTEND:
8912 /* XXX still guessing. */
8913 switch (GET_MODE (XEXP (x, 0)))
8915 case QImode:
8916 return (1 + (mode == DImode ? 4 : 0)
8917 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8919 case HImode:
8920 return (4 + (mode == DImode ? 4 : 0)
8921 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8923 case SImode:
8924 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8926 default:
8927 return 99;
8930 default:
8931 return 99;
8935 /* RTX costs when optimizing for size. */
8936 static bool
8937 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8938 int *total)
8940 machine_mode mode = GET_MODE (x);
8941 if (TARGET_THUMB1)
8943 *total = thumb1_size_rtx_costs (x, code, outer_code);
8944 return true;
8947 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
8948 switch (code)
8950 case MEM:
8951 /* A memory access costs 1 insn if the mode is small, or the address is
8952 a single register, otherwise it costs one insn per word. */
8953 if (REG_P (XEXP (x, 0)))
8954 *total = COSTS_N_INSNS (1);
8955 else if (flag_pic
8956 && GET_CODE (XEXP (x, 0)) == PLUS
8957 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
8958 /* This will be split into two instructions.
8959 See arm.md:calculate_pic_address. */
8960 *total = COSTS_N_INSNS (2);
8961 else
8962 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8963 return true;
8965 case DIV:
8966 case MOD:
8967 case UDIV:
8968 case UMOD:
8969 /* Needs a libcall, so it costs about this. */
8970 *total = COSTS_N_INSNS (2);
8971 return false;
8973 case ROTATE:
8974 if (mode == SImode && REG_P (XEXP (x, 1)))
8976 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
8977 return true;
8979 /* Fall through */
8980 case ROTATERT:
8981 case ASHIFT:
8982 case LSHIFTRT:
8983 case ASHIFTRT:
8984 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
8986 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
8987 return true;
8989 else if (mode == SImode)
8991 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
8992 /* Slightly disparage register shifts, but not by much. */
8993 if (!CONST_INT_P (XEXP (x, 1)))
8994 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
8995 return true;
8998 /* Needs a libcall. */
8999 *total = COSTS_N_INSNS (2);
9000 return false;
9002 case MINUS:
9003 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9004 && (mode == SFmode || !TARGET_VFP_SINGLE))
9006 *total = COSTS_N_INSNS (1);
9007 return false;
9010 if (mode == SImode)
9012 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9013 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9015 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9016 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9017 || subcode1 == ROTATE || subcode1 == ROTATERT
9018 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9019 || subcode1 == ASHIFTRT)
9021 /* It's just the cost of the two operands. */
9022 *total = 0;
9023 return false;
9026 *total = COSTS_N_INSNS (1);
9027 return false;
9030 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9031 return false;
9033 case PLUS:
9034 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9035 && (mode == SFmode || !TARGET_VFP_SINGLE))
9037 *total = COSTS_N_INSNS (1);
9038 return false;
9041 /* A shift as a part of ADD costs nothing. */
9042 if (GET_CODE (XEXP (x, 0)) == MULT
9043 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9045 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9046 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
9047 *total += rtx_cost (XEXP (x, 1), code, 1, false);
9048 return true;
9051 /* Fall through */
9052 case AND: case XOR: case IOR:
9053 if (mode == SImode)
9055 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9057 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9058 || subcode == LSHIFTRT || subcode == ASHIFTRT
9059 || (code == AND && subcode == NOT))
9061 /* It's just the cost of the two operands. */
9062 *total = 0;
9063 return false;
9067 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9068 return false;
9070 case MULT:
9071 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9072 return false;
9074 case NEG:
9075 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9076 && (mode == SFmode || !TARGET_VFP_SINGLE))
9078 *total = COSTS_N_INSNS (1);
9079 return false;
9082 /* Fall through */
9083 case NOT:
9084 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9086 return false;
9088 case IF_THEN_ELSE:
9089 *total = 0;
9090 return false;
9092 case COMPARE:
9093 if (cc_register (XEXP (x, 0), VOIDmode))
9094 * total = 0;
9095 else
9096 *total = COSTS_N_INSNS (1);
9097 return false;
9099 case ABS:
9100 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9101 && (mode == SFmode || !TARGET_VFP_SINGLE))
9102 *total = COSTS_N_INSNS (1);
9103 else
9104 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9105 return false;
9107 case SIGN_EXTEND:
9108 case ZERO_EXTEND:
9109 return arm_rtx_costs_1 (x, outer_code, total, 0);
9111 case CONST_INT:
9112 if (const_ok_for_arm (INTVAL (x)))
9113 /* A multiplication by a constant requires another instruction
9114 to load the constant to a register. */
9115 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9116 ? 1 : 0);
9117 else if (const_ok_for_arm (~INTVAL (x)))
9118 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9119 else if (const_ok_for_arm (-INTVAL (x)))
9121 if (outer_code == COMPARE || outer_code == PLUS
9122 || outer_code == MINUS)
9123 *total = 0;
9124 else
9125 *total = COSTS_N_INSNS (1);
9127 else
9128 *total = COSTS_N_INSNS (2);
9129 return true;
9131 case CONST:
9132 case LABEL_REF:
9133 case SYMBOL_REF:
9134 *total = COSTS_N_INSNS (2);
9135 return true;
9137 case CONST_DOUBLE:
9138 *total = COSTS_N_INSNS (4);
9139 return true;
9141 case CONST_VECTOR:
9142 if (TARGET_NEON
9143 && TARGET_HARD_FLOAT
9144 && outer_code == SET
9145 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9146 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9147 *total = COSTS_N_INSNS (1);
9148 else
9149 *total = COSTS_N_INSNS (4);
9150 return true;
9152 case HIGH:
9153 case LO_SUM:
9154 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9155 cost of these slightly. */
9156 *total = COSTS_N_INSNS (1) + 1;
9157 return true;
9159 case SET:
9160 return false;
9162 default:
9163 if (mode != VOIDmode)
9164 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9165 else
9166 *total = COSTS_N_INSNS (4); /* How knows? */
9167 return false;
9171 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9172 operand, then return the operand that is being shifted. If the shift
9173 is not by a constant, then set SHIFT_REG to point to the operand.
9174 Return NULL if OP is not a shifter operand. */
9175 static rtx
9176 shifter_op_p (rtx op, rtx *shift_reg)
9178 enum rtx_code code = GET_CODE (op);
9180 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9181 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9182 return XEXP (op, 0);
9183 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9184 return XEXP (op, 0);
9185 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9186 || code == ASHIFTRT)
9188 if (!CONST_INT_P (XEXP (op, 1)))
9189 *shift_reg = XEXP (op, 1);
9190 return XEXP (op, 0);
9193 return NULL;
9196 static bool
9197 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9199 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9200 gcc_assert (GET_CODE (x) == UNSPEC);
9202 switch (XINT (x, 1))
9204 case UNSPEC_UNALIGNED_LOAD:
9205 /* We can only do unaligned loads into the integer unit, and we can't
9206 use LDM or LDRD. */
9207 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9208 if (speed_p)
9209 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9210 + extra_cost->ldst.load_unaligned);
9212 #ifdef NOT_YET
9213 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9214 ADDR_SPACE_GENERIC, speed_p);
9215 #endif
9216 return true;
9218 case UNSPEC_UNALIGNED_STORE:
9219 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9220 if (speed_p)
9221 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9222 + extra_cost->ldst.store_unaligned);
9224 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9225 #ifdef NOT_YET
9226 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9227 ADDR_SPACE_GENERIC, speed_p);
9228 #endif
9229 return true;
9231 case UNSPEC_VRINTZ:
9232 case UNSPEC_VRINTP:
9233 case UNSPEC_VRINTM:
9234 case UNSPEC_VRINTR:
9235 case UNSPEC_VRINTX:
9236 case UNSPEC_VRINTA:
9237 *cost = COSTS_N_INSNS (1);
9238 if (speed_p)
9239 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9241 return true;
9242 default:
9243 *cost = COSTS_N_INSNS (2);
9244 break;
9246 return false;
9249 /* Cost of a libcall. We assume one insn per argument, an amount for the
9250 call (one insn for -Os) and then one for processing the result. */
9251 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9253 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9254 do \
9256 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9257 if (shift_op != NULL \
9258 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9260 if (shift_reg) \
9262 if (speed_p) \
9263 *cost += extra_cost->alu.arith_shift_reg; \
9264 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9266 else if (speed_p) \
9267 *cost += extra_cost->alu.arith_shift; \
9269 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9270 + rtx_cost (XEXP (x, 1 - IDX), \
9271 OP, 1, speed_p)); \
9272 return true; \
9275 while (0);
9277 /* RTX costs. Make an estimate of the cost of executing the operation
9278 X, which is contained with an operation with code OUTER_CODE.
9279 SPEED_P indicates whether the cost desired is the performance cost,
9280 or the size cost. The estimate is stored in COST and the return
9281 value is TRUE if the cost calculation is final, or FALSE if the
9282 caller should recurse through the operands of X to add additional
9283 costs.
9285 We currently make no attempt to model the size savings of Thumb-2
9286 16-bit instructions. At the normal points in compilation where
9287 this code is called we have no measure of whether the condition
9288 flags are live or not, and thus no realistic way to determine what
9289 the size will eventually be. */
9290 static bool
9291 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9292 const struct cpu_cost_table *extra_cost,
9293 int *cost, bool speed_p)
9295 machine_mode mode = GET_MODE (x);
9297 if (TARGET_THUMB1)
9299 if (speed_p)
9300 *cost = thumb1_rtx_costs (x, code, outer_code);
9301 else
9302 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9303 return true;
9306 switch (code)
9308 case SET:
9309 *cost = 0;
9310 /* SET RTXs don't have a mode so we get it from the destination. */
9311 mode = GET_MODE (SET_DEST (x));
9313 if (REG_P (SET_SRC (x))
9314 && REG_P (SET_DEST (x)))
9316 /* Assume that most copies can be done with a single insn,
9317 unless we don't have HW FP, in which case everything
9318 larger than word mode will require two insns. */
9319 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9320 && GET_MODE_SIZE (mode) > 4)
9321 || mode == DImode)
9322 ? 2 : 1);
9323 /* Conditional register moves can be encoded
9324 in 16 bits in Thumb mode. */
9325 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9326 *cost >>= 1;
9328 return true;
9331 if (CONST_INT_P (SET_SRC (x)))
9333 /* Handle CONST_INT here, since the value doesn't have a mode
9334 and we would otherwise be unable to work out the true cost. */
9335 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9336 outer_code = SET;
9337 /* Slightly lower the cost of setting a core reg to a constant.
9338 This helps break up chains and allows for better scheduling. */
9339 if (REG_P (SET_DEST (x))
9340 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9341 *cost -= 1;
9342 x = SET_SRC (x);
9343 /* Immediate moves with an immediate in the range [0, 255] can be
9344 encoded in 16 bits in Thumb mode. */
9345 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9346 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9347 *cost >>= 1;
9348 goto const_int_cost;
9351 return false;
9353 case MEM:
9354 /* A memory access costs 1 insn if the mode is small, or the address is
9355 a single register, otherwise it costs one insn per word. */
9356 if (REG_P (XEXP (x, 0)))
9357 *cost = COSTS_N_INSNS (1);
9358 else if (flag_pic
9359 && GET_CODE (XEXP (x, 0)) == PLUS
9360 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9361 /* This will be split into two instructions.
9362 See arm.md:calculate_pic_address. */
9363 *cost = COSTS_N_INSNS (2);
9364 else
9365 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9367 /* For speed optimizations, add the costs of the address and
9368 accessing memory. */
9369 if (speed_p)
9370 #ifdef NOT_YET
9371 *cost += (extra_cost->ldst.load
9372 + arm_address_cost (XEXP (x, 0), mode,
9373 ADDR_SPACE_GENERIC, speed_p));
9374 #else
9375 *cost += extra_cost->ldst.load;
9376 #endif
9377 return true;
9379 case PARALLEL:
9381 /* Calculations of LDM costs are complex. We assume an initial cost
9382 (ldm_1st) which will load the number of registers mentioned in
9383 ldm_regs_per_insn_1st registers; then each additional
9384 ldm_regs_per_insn_subsequent registers cost one more insn. The
9385 formula for N regs is thus:
9387 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9388 + ldm_regs_per_insn_subsequent - 1)
9389 / ldm_regs_per_insn_subsequent).
9391 Additional costs may also be added for addressing. A similar
9392 formula is used for STM. */
9394 bool is_ldm = load_multiple_operation (x, SImode);
9395 bool is_stm = store_multiple_operation (x, SImode);
9397 *cost = COSTS_N_INSNS (1);
9399 if (is_ldm || is_stm)
9401 if (speed_p)
9403 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9404 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9405 ? extra_cost->ldst.ldm_regs_per_insn_1st
9406 : extra_cost->ldst.stm_regs_per_insn_1st;
9407 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9408 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9409 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9411 *cost += regs_per_insn_1st
9412 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9413 + regs_per_insn_sub - 1)
9414 / regs_per_insn_sub);
9415 return true;
9419 return false;
9421 case DIV:
9422 case UDIV:
9423 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9424 && (mode == SFmode || !TARGET_VFP_SINGLE))
9425 *cost = COSTS_N_INSNS (speed_p
9426 ? extra_cost->fp[mode != SFmode].div : 1);
9427 else if (mode == SImode && TARGET_IDIV)
9428 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9429 else
9430 *cost = LIBCALL_COST (2);
9431 return false; /* All arguments must be in registers. */
9433 case MOD:
9434 case UMOD:
9435 *cost = LIBCALL_COST (2);
9436 return false; /* All arguments must be in registers. */
9438 case ROTATE:
9439 if (mode == SImode && REG_P (XEXP (x, 1)))
9441 *cost = (COSTS_N_INSNS (2)
9442 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9443 if (speed_p)
9444 *cost += extra_cost->alu.shift_reg;
9445 return true;
9447 /* Fall through */
9448 case ROTATERT:
9449 case ASHIFT:
9450 case LSHIFTRT:
9451 case ASHIFTRT:
9452 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9454 *cost = (COSTS_N_INSNS (3)
9455 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9456 if (speed_p)
9457 *cost += 2 * extra_cost->alu.shift;
9458 return true;
9460 else if (mode == SImode)
9462 *cost = (COSTS_N_INSNS (1)
9463 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9464 /* Slightly disparage register shifts at -Os, but not by much. */
9465 if (!CONST_INT_P (XEXP (x, 1)))
9466 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9467 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9468 return true;
9470 else if (GET_MODE_CLASS (mode) == MODE_INT
9471 && GET_MODE_SIZE (mode) < 4)
9473 if (code == ASHIFT)
9475 *cost = (COSTS_N_INSNS (1)
9476 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9477 /* Slightly disparage register shifts at -Os, but not by
9478 much. */
9479 if (!CONST_INT_P (XEXP (x, 1)))
9480 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9481 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9483 else if (code == LSHIFTRT || code == ASHIFTRT)
9485 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9487 /* Can use SBFX/UBFX. */
9488 *cost = COSTS_N_INSNS (1);
9489 if (speed_p)
9490 *cost += extra_cost->alu.bfx;
9491 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9493 else
9495 *cost = COSTS_N_INSNS (2);
9496 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9497 if (speed_p)
9499 if (CONST_INT_P (XEXP (x, 1)))
9500 *cost += 2 * extra_cost->alu.shift;
9501 else
9502 *cost += (extra_cost->alu.shift
9503 + extra_cost->alu.shift_reg);
9505 else
9506 /* Slightly disparage register shifts. */
9507 *cost += !CONST_INT_P (XEXP (x, 1));
9510 else /* Rotates. */
9512 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9513 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9514 if (speed_p)
9516 if (CONST_INT_P (XEXP (x, 1)))
9517 *cost += (2 * extra_cost->alu.shift
9518 + extra_cost->alu.log_shift);
9519 else
9520 *cost += (extra_cost->alu.shift
9521 + extra_cost->alu.shift_reg
9522 + extra_cost->alu.log_shift_reg);
9525 return true;
9528 *cost = LIBCALL_COST (2);
9529 return false;
9531 case BSWAP:
9532 if (arm_arch6)
9534 if (mode == SImode)
9536 *cost = COSTS_N_INSNS (1);
9537 if (speed_p)
9538 *cost += extra_cost->alu.rev;
9540 return false;
9543 else
9545 /* No rev instruction available. Look at arm_legacy_rev
9546 and thumb_legacy_rev for the form of RTL used then. */
9547 if (TARGET_THUMB)
9549 *cost = COSTS_N_INSNS (10);
9551 if (speed_p)
9553 *cost += 6 * extra_cost->alu.shift;
9554 *cost += 3 * extra_cost->alu.logical;
9557 else
9559 *cost = COSTS_N_INSNS (5);
9561 if (speed_p)
9563 *cost += 2 * extra_cost->alu.shift;
9564 *cost += extra_cost->alu.arith_shift;
9565 *cost += 2 * extra_cost->alu.logical;
9568 return true;
9570 return false;
9572 case MINUS:
9573 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9574 && (mode == SFmode || !TARGET_VFP_SINGLE))
9576 *cost = COSTS_N_INSNS (1);
9577 if (GET_CODE (XEXP (x, 0)) == MULT
9578 || GET_CODE (XEXP (x, 1)) == MULT)
9580 rtx mul_op0, mul_op1, sub_op;
9582 if (speed_p)
9583 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9585 if (GET_CODE (XEXP (x, 0)) == MULT)
9587 mul_op0 = XEXP (XEXP (x, 0), 0);
9588 mul_op1 = XEXP (XEXP (x, 0), 1);
9589 sub_op = XEXP (x, 1);
9591 else
9593 mul_op0 = XEXP (XEXP (x, 1), 0);
9594 mul_op1 = XEXP (XEXP (x, 1), 1);
9595 sub_op = XEXP (x, 0);
9598 /* The first operand of the multiply may be optionally
9599 negated. */
9600 if (GET_CODE (mul_op0) == NEG)
9601 mul_op0 = XEXP (mul_op0, 0);
9603 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9604 + rtx_cost (mul_op1, code, 0, speed_p)
9605 + rtx_cost (sub_op, code, 0, speed_p));
9607 return true;
9610 if (speed_p)
9611 *cost += extra_cost->fp[mode != SFmode].addsub;
9612 return false;
9615 if (mode == SImode)
9617 rtx shift_by_reg = NULL;
9618 rtx shift_op;
9619 rtx non_shift_op;
9621 *cost = COSTS_N_INSNS (1);
9623 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9624 if (shift_op == NULL)
9626 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9627 non_shift_op = XEXP (x, 0);
9629 else
9630 non_shift_op = XEXP (x, 1);
9632 if (shift_op != NULL)
9634 if (shift_by_reg != NULL)
9636 if (speed_p)
9637 *cost += extra_cost->alu.arith_shift_reg;
9638 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9640 else if (speed_p)
9641 *cost += extra_cost->alu.arith_shift;
9643 *cost += (rtx_cost (shift_op, code, 0, speed_p)
9644 + rtx_cost (non_shift_op, code, 0, speed_p));
9645 return true;
9648 if (arm_arch_thumb2
9649 && GET_CODE (XEXP (x, 1)) == MULT)
9651 /* MLS. */
9652 if (speed_p)
9653 *cost += extra_cost->mult[0].add;
9654 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9655 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9656 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9657 return true;
9660 if (CONST_INT_P (XEXP (x, 0)))
9662 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9663 INTVAL (XEXP (x, 0)), NULL_RTX,
9664 NULL_RTX, 1, 0);
9665 *cost = COSTS_N_INSNS (insns);
9666 if (speed_p)
9667 *cost += insns * extra_cost->alu.arith;
9668 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9669 return true;
9671 else if (speed_p)
9672 *cost += extra_cost->alu.arith;
9674 return false;
9677 if (GET_MODE_CLASS (mode) == MODE_INT
9678 && GET_MODE_SIZE (mode) < 4)
9680 rtx shift_op, shift_reg;
9681 shift_reg = NULL;
9683 /* We check both sides of the MINUS for shifter operands since,
9684 unlike PLUS, it's not commutative. */
9686 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9687 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9689 /* Slightly disparage, as we might need to widen the result. */
9690 *cost = 1 + COSTS_N_INSNS (1);
9691 if (speed_p)
9692 *cost += extra_cost->alu.arith;
9694 if (CONST_INT_P (XEXP (x, 0)))
9696 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9697 return true;
9700 return false;
9703 if (mode == DImode)
9705 *cost = COSTS_N_INSNS (2);
9707 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9709 rtx op1 = XEXP (x, 1);
9711 if (speed_p)
9712 *cost += 2 * extra_cost->alu.arith;
9714 if (GET_CODE (op1) == ZERO_EXTEND)
9715 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9716 else
9717 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9718 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9719 0, speed_p);
9720 return true;
9722 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9724 if (speed_p)
9725 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9726 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9727 0, speed_p)
9728 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9729 return true;
9731 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9732 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9734 if (speed_p)
9735 *cost += (extra_cost->alu.arith
9736 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9737 ? extra_cost->alu.arith
9738 : extra_cost->alu.arith_shift));
9739 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9740 + rtx_cost (XEXP (XEXP (x, 1), 0),
9741 GET_CODE (XEXP (x, 1)), 0, speed_p));
9742 return true;
9745 if (speed_p)
9746 *cost += 2 * extra_cost->alu.arith;
9747 return false;
9750 /* Vector mode? */
9752 *cost = LIBCALL_COST (2);
9753 return false;
9755 case PLUS:
9756 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9757 && (mode == SFmode || !TARGET_VFP_SINGLE))
9759 *cost = COSTS_N_INSNS (1);
9760 if (GET_CODE (XEXP (x, 0)) == MULT)
9762 rtx mul_op0, mul_op1, add_op;
9764 if (speed_p)
9765 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9767 mul_op0 = XEXP (XEXP (x, 0), 0);
9768 mul_op1 = XEXP (XEXP (x, 0), 1);
9769 add_op = XEXP (x, 1);
9771 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9772 + rtx_cost (mul_op1, code, 0, speed_p)
9773 + rtx_cost (add_op, code, 0, speed_p));
9775 return true;
9778 if (speed_p)
9779 *cost += extra_cost->fp[mode != SFmode].addsub;
9780 return false;
9782 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9784 *cost = LIBCALL_COST (2);
9785 return false;
9788 /* Narrow modes can be synthesized in SImode, but the range
9789 of useful sub-operations is limited. Check for shift operations
9790 on one of the operands. Only left shifts can be used in the
9791 narrow modes. */
9792 if (GET_MODE_CLASS (mode) == MODE_INT
9793 && GET_MODE_SIZE (mode) < 4)
9795 rtx shift_op, shift_reg;
9796 shift_reg = NULL;
9798 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9800 if (CONST_INT_P (XEXP (x, 1)))
9802 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9803 INTVAL (XEXP (x, 1)), NULL_RTX,
9804 NULL_RTX, 1, 0);
9805 *cost = COSTS_N_INSNS (insns);
9806 if (speed_p)
9807 *cost += insns * extra_cost->alu.arith;
9808 /* Slightly penalize a narrow operation as the result may
9809 need widening. */
9810 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9811 return true;
9814 /* Slightly penalize a narrow operation as the result may
9815 need widening. */
9816 *cost = 1 + COSTS_N_INSNS (1);
9817 if (speed_p)
9818 *cost += extra_cost->alu.arith;
9820 return false;
9823 if (mode == SImode)
9825 rtx shift_op, shift_reg;
9827 *cost = COSTS_N_INSNS (1);
9828 if (TARGET_INT_SIMD
9829 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9830 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9832 /* UXTA[BH] or SXTA[BH]. */
9833 if (speed_p)
9834 *cost += extra_cost->alu.extend_arith;
9835 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9836 speed_p)
9837 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9838 return true;
9841 shift_reg = NULL;
9842 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9843 if (shift_op != NULL)
9845 if (shift_reg)
9847 if (speed_p)
9848 *cost += extra_cost->alu.arith_shift_reg;
9849 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9851 else if (speed_p)
9852 *cost += extra_cost->alu.arith_shift;
9854 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9855 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9856 return true;
9858 if (GET_CODE (XEXP (x, 0)) == MULT)
9860 rtx mul_op = XEXP (x, 0);
9862 *cost = COSTS_N_INSNS (1);
9864 if (TARGET_DSP_MULTIPLY
9865 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9866 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9867 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9868 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9869 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9870 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9871 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9872 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9873 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9874 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9875 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9876 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9877 == 16))))))
9879 /* SMLA[BT][BT]. */
9880 if (speed_p)
9881 *cost += extra_cost->mult[0].extend_add;
9882 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9883 SIGN_EXTEND, 0, speed_p)
9884 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9885 SIGN_EXTEND, 0, speed_p)
9886 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9887 return true;
9890 if (speed_p)
9891 *cost += extra_cost->mult[0].add;
9892 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9893 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9894 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9895 return true;
9897 if (CONST_INT_P (XEXP (x, 1)))
9899 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9900 INTVAL (XEXP (x, 1)), NULL_RTX,
9901 NULL_RTX, 1, 0);
9902 *cost = COSTS_N_INSNS (insns);
9903 if (speed_p)
9904 *cost += insns * extra_cost->alu.arith;
9905 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9906 return true;
9908 else if (speed_p)
9909 *cost += extra_cost->alu.arith;
9911 return false;
9914 if (mode == DImode)
9916 if (arm_arch3m
9917 && GET_CODE (XEXP (x, 0)) == MULT
9918 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9919 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9920 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9921 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9923 *cost = COSTS_N_INSNS (1);
9924 if (speed_p)
9925 *cost += extra_cost->mult[1].extend_add;
9926 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
9927 ZERO_EXTEND, 0, speed_p)
9928 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
9929 ZERO_EXTEND, 0, speed_p)
9930 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9931 return true;
9934 *cost = COSTS_N_INSNS (2);
9936 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9937 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9939 if (speed_p)
9940 *cost += (extra_cost->alu.arith
9941 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9942 ? extra_cost->alu.arith
9943 : extra_cost->alu.arith_shift));
9945 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9946 speed_p)
9947 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9948 return true;
9951 if (speed_p)
9952 *cost += 2 * extra_cost->alu.arith;
9953 return false;
9956 /* Vector mode? */
9957 *cost = LIBCALL_COST (2);
9958 return false;
9959 case IOR:
9960 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9962 *cost = COSTS_N_INSNS (1);
9963 if (speed_p)
9964 *cost += extra_cost->alu.rev;
9966 return true;
9968 /* Fall through. */
9969 case AND: case XOR:
9970 if (mode == SImode)
9972 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9973 rtx op0 = XEXP (x, 0);
9974 rtx shift_op, shift_reg;
9976 *cost = COSTS_N_INSNS (1);
9978 if (subcode == NOT
9979 && (code == AND
9980 || (code == IOR && TARGET_THUMB2)))
9981 op0 = XEXP (op0, 0);
9983 shift_reg = NULL;
9984 shift_op = shifter_op_p (op0, &shift_reg);
9985 if (shift_op != NULL)
9987 if (shift_reg)
9989 if (speed_p)
9990 *cost += extra_cost->alu.log_shift_reg;
9991 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9993 else if (speed_p)
9994 *cost += extra_cost->alu.log_shift;
9996 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9997 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9998 return true;
10001 if (CONST_INT_P (XEXP (x, 1)))
10003 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10004 INTVAL (XEXP (x, 1)), NULL_RTX,
10005 NULL_RTX, 1, 0);
10007 *cost = COSTS_N_INSNS (insns);
10008 if (speed_p)
10009 *cost += insns * extra_cost->alu.logical;
10010 *cost += rtx_cost (op0, code, 0, speed_p);
10011 return true;
10014 if (speed_p)
10015 *cost += extra_cost->alu.logical;
10016 *cost += (rtx_cost (op0, code, 0, speed_p)
10017 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10018 return true;
10021 if (mode == DImode)
10023 rtx op0 = XEXP (x, 0);
10024 enum rtx_code subcode = GET_CODE (op0);
10026 *cost = COSTS_N_INSNS (2);
10028 if (subcode == NOT
10029 && (code == AND
10030 || (code == IOR && TARGET_THUMB2)))
10031 op0 = XEXP (op0, 0);
10033 if (GET_CODE (op0) == ZERO_EXTEND)
10035 if (speed_p)
10036 *cost += 2 * extra_cost->alu.logical;
10038 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
10039 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10040 return true;
10042 else if (GET_CODE (op0) == SIGN_EXTEND)
10044 if (speed_p)
10045 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10047 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
10048 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10049 return true;
10052 if (speed_p)
10053 *cost += 2 * extra_cost->alu.logical;
10055 return true;
10057 /* Vector mode? */
10059 *cost = LIBCALL_COST (2);
10060 return false;
10062 case MULT:
10063 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10064 && (mode == SFmode || !TARGET_VFP_SINGLE))
10066 rtx op0 = XEXP (x, 0);
10068 *cost = COSTS_N_INSNS (1);
10070 if (GET_CODE (op0) == NEG)
10071 op0 = XEXP (op0, 0);
10073 if (speed_p)
10074 *cost += extra_cost->fp[mode != SFmode].mult;
10076 *cost += (rtx_cost (op0, MULT, 0, speed_p)
10077 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10078 return true;
10080 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10082 *cost = LIBCALL_COST (2);
10083 return false;
10086 if (mode == SImode)
10088 *cost = COSTS_N_INSNS (1);
10089 if (TARGET_DSP_MULTIPLY
10090 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10091 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10092 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10093 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10094 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10095 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10096 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10097 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10098 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10099 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10100 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10101 && (INTVAL (XEXP (XEXP (x, 1), 1))
10102 == 16))))))
10104 /* SMUL[TB][TB]. */
10105 if (speed_p)
10106 *cost += extra_cost->mult[0].extend;
10107 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10108 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10109 return true;
10111 if (speed_p)
10112 *cost += extra_cost->mult[0].simple;
10113 return false;
10116 if (mode == DImode)
10118 if (arm_arch3m
10119 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10120 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10121 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10122 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10124 *cost = COSTS_N_INSNS (1);
10125 if (speed_p)
10126 *cost += extra_cost->mult[1].extend;
10127 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10128 ZERO_EXTEND, 0, speed_p)
10129 + rtx_cost (XEXP (XEXP (x, 1), 0),
10130 ZERO_EXTEND, 0, speed_p));
10131 return true;
10134 *cost = LIBCALL_COST (2);
10135 return false;
10138 /* Vector mode? */
10139 *cost = LIBCALL_COST (2);
10140 return false;
10142 case NEG:
10143 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10144 && (mode == SFmode || !TARGET_VFP_SINGLE))
10146 *cost = COSTS_N_INSNS (1);
10147 if (speed_p)
10148 *cost += extra_cost->fp[mode != SFmode].neg;
10150 return false;
10152 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10154 *cost = LIBCALL_COST (1);
10155 return false;
10158 if (mode == SImode)
10160 if (GET_CODE (XEXP (x, 0)) == ABS)
10162 *cost = COSTS_N_INSNS (2);
10163 /* Assume the non-flag-changing variant. */
10164 if (speed_p)
10165 *cost += (extra_cost->alu.log_shift
10166 + extra_cost->alu.arith_shift);
10167 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10168 return true;
10171 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10172 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10174 *cost = COSTS_N_INSNS (2);
10175 /* No extra cost for MOV imm and MVN imm. */
10176 /* If the comparison op is using the flags, there's no further
10177 cost, otherwise we need to add the cost of the comparison. */
10178 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10179 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10180 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10182 *cost += (COSTS_N_INSNS (1)
10183 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10184 speed_p)
10185 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10186 speed_p));
10187 if (speed_p)
10188 *cost += extra_cost->alu.arith;
10190 return true;
10192 *cost = COSTS_N_INSNS (1);
10193 if (speed_p)
10194 *cost += extra_cost->alu.arith;
10195 return false;
10198 if (GET_MODE_CLASS (mode) == MODE_INT
10199 && GET_MODE_SIZE (mode) < 4)
10201 /* Slightly disparage, as we might need an extend operation. */
10202 *cost = 1 + COSTS_N_INSNS (1);
10203 if (speed_p)
10204 *cost += extra_cost->alu.arith;
10205 return false;
10208 if (mode == DImode)
10210 *cost = COSTS_N_INSNS (2);
10211 if (speed_p)
10212 *cost += 2 * extra_cost->alu.arith;
10213 return false;
10216 /* Vector mode? */
10217 *cost = LIBCALL_COST (1);
10218 return false;
10220 case NOT:
10221 if (mode == SImode)
10223 rtx shift_op;
10224 rtx shift_reg = NULL;
10226 *cost = COSTS_N_INSNS (1);
10227 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10229 if (shift_op)
10231 if (shift_reg != NULL)
10233 if (speed_p)
10234 *cost += extra_cost->alu.log_shift_reg;
10235 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10237 else if (speed_p)
10238 *cost += extra_cost->alu.log_shift;
10239 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10240 return true;
10243 if (speed_p)
10244 *cost += extra_cost->alu.logical;
10245 return false;
10247 if (mode == DImode)
10249 *cost = COSTS_N_INSNS (2);
10250 return false;
10253 /* Vector mode? */
10255 *cost += LIBCALL_COST (1);
10256 return false;
10258 case IF_THEN_ELSE:
10260 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10262 *cost = COSTS_N_INSNS (4);
10263 return true;
10265 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10266 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10268 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10269 /* Assume that if one arm of the if_then_else is a register,
10270 that it will be tied with the result and eliminate the
10271 conditional insn. */
10272 if (REG_P (XEXP (x, 1)))
10273 *cost += op2cost;
10274 else if (REG_P (XEXP (x, 2)))
10275 *cost += op1cost;
10276 else
10278 if (speed_p)
10280 if (extra_cost->alu.non_exec_costs_exec)
10281 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10282 else
10283 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10285 else
10286 *cost += op1cost + op2cost;
10289 return true;
10291 case COMPARE:
10292 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10293 *cost = 0;
10294 else
10296 machine_mode op0mode;
10297 /* We'll mostly assume that the cost of a compare is the cost of the
10298 LHS. However, there are some notable exceptions. */
10300 /* Floating point compares are never done as side-effects. */
10301 op0mode = GET_MODE (XEXP (x, 0));
10302 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10303 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10305 *cost = COSTS_N_INSNS (1);
10306 if (speed_p)
10307 *cost += extra_cost->fp[op0mode != SFmode].compare;
10309 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10311 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10312 return true;
10315 return false;
10317 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10319 *cost = LIBCALL_COST (2);
10320 return false;
10323 /* DImode compares normally take two insns. */
10324 if (op0mode == DImode)
10326 *cost = COSTS_N_INSNS (2);
10327 if (speed_p)
10328 *cost += 2 * extra_cost->alu.arith;
10329 return false;
10332 if (op0mode == SImode)
10334 rtx shift_op;
10335 rtx shift_reg;
10337 if (XEXP (x, 1) == const0_rtx
10338 && !(REG_P (XEXP (x, 0))
10339 || (GET_CODE (XEXP (x, 0)) == SUBREG
10340 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10342 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10344 /* Multiply operations that set the flags are often
10345 significantly more expensive. */
10346 if (speed_p
10347 && GET_CODE (XEXP (x, 0)) == MULT
10348 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10349 *cost += extra_cost->mult[0].flag_setting;
10351 if (speed_p
10352 && GET_CODE (XEXP (x, 0)) == PLUS
10353 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10354 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10355 0), 1), mode))
10356 *cost += extra_cost->mult[0].flag_setting;
10357 return true;
10360 shift_reg = NULL;
10361 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10362 if (shift_op != NULL)
10364 *cost = COSTS_N_INSNS (1);
10365 if (shift_reg != NULL)
10367 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10368 if (speed_p)
10369 *cost += extra_cost->alu.arith_shift_reg;
10371 else if (speed_p)
10372 *cost += extra_cost->alu.arith_shift;
10373 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10374 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10375 return true;
10378 *cost = COSTS_N_INSNS (1);
10379 if (speed_p)
10380 *cost += extra_cost->alu.arith;
10381 if (CONST_INT_P (XEXP (x, 1))
10382 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10384 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10385 return true;
10387 return false;
10390 /* Vector mode? */
10392 *cost = LIBCALL_COST (2);
10393 return false;
10395 return true;
10397 case EQ:
10398 case NE:
10399 case LT:
10400 case LE:
10401 case GT:
10402 case GE:
10403 case LTU:
10404 case LEU:
10405 case GEU:
10406 case GTU:
10407 case ORDERED:
10408 case UNORDERED:
10409 case UNEQ:
10410 case UNLE:
10411 case UNLT:
10412 case UNGE:
10413 case UNGT:
10414 case LTGT:
10415 if (outer_code == SET)
10417 /* Is it a store-flag operation? */
10418 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10419 && XEXP (x, 1) == const0_rtx)
10421 /* Thumb also needs an IT insn. */
10422 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10423 return true;
10425 if (XEXP (x, 1) == const0_rtx)
10427 switch (code)
10429 case LT:
10430 /* LSR Rd, Rn, #31. */
10431 *cost = COSTS_N_INSNS (1);
10432 if (speed_p)
10433 *cost += extra_cost->alu.shift;
10434 break;
10436 case EQ:
10437 /* RSBS T1, Rn, #0
10438 ADC Rd, Rn, T1. */
10440 case NE:
10441 /* SUBS T1, Rn, #1
10442 SBC Rd, Rn, T1. */
10443 *cost = COSTS_N_INSNS (2);
10444 break;
10446 case LE:
10447 /* RSBS T1, Rn, Rn, LSR #31
10448 ADC Rd, Rn, T1. */
10449 *cost = COSTS_N_INSNS (2);
10450 if (speed_p)
10451 *cost += extra_cost->alu.arith_shift;
10452 break;
10454 case GT:
10455 /* RSB Rd, Rn, Rn, ASR #1
10456 LSR Rd, Rd, #31. */
10457 *cost = COSTS_N_INSNS (2);
10458 if (speed_p)
10459 *cost += (extra_cost->alu.arith_shift
10460 + extra_cost->alu.shift);
10461 break;
10463 case GE:
10464 /* ASR Rd, Rn, #31
10465 ADD Rd, Rn, #1. */
10466 *cost = COSTS_N_INSNS (2);
10467 if (speed_p)
10468 *cost += extra_cost->alu.shift;
10469 break;
10471 default:
10472 /* Remaining cases are either meaningless or would take
10473 three insns anyway. */
10474 *cost = COSTS_N_INSNS (3);
10475 break;
10477 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10478 return true;
10480 else
10482 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10483 if (CONST_INT_P (XEXP (x, 1))
10484 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10486 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10487 return true;
10490 return false;
10493 /* Not directly inside a set. If it involves the condition code
10494 register it must be the condition for a branch, cond_exec or
10495 I_T_E operation. Since the comparison is performed elsewhere
10496 this is just the control part which has no additional
10497 cost. */
10498 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10499 && XEXP (x, 1) == const0_rtx)
10501 *cost = 0;
10502 return true;
10504 return false;
10506 case ABS:
10507 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10508 && (mode == SFmode || !TARGET_VFP_SINGLE))
10510 *cost = COSTS_N_INSNS (1);
10511 if (speed_p)
10512 *cost += extra_cost->fp[mode != SFmode].neg;
10514 return false;
10516 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10518 *cost = LIBCALL_COST (1);
10519 return false;
10522 if (mode == SImode)
10524 *cost = COSTS_N_INSNS (1);
10525 if (speed_p)
10526 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10527 return false;
10529 /* Vector mode? */
10530 *cost = LIBCALL_COST (1);
10531 return false;
10533 case SIGN_EXTEND:
10534 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10535 && MEM_P (XEXP (x, 0)))
10537 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10539 if (mode == DImode)
10540 *cost += COSTS_N_INSNS (1);
10542 if (!speed_p)
10543 return true;
10545 if (GET_MODE (XEXP (x, 0)) == SImode)
10546 *cost += extra_cost->ldst.load;
10547 else
10548 *cost += extra_cost->ldst.load_sign_extend;
10550 if (mode == DImode)
10551 *cost += extra_cost->alu.shift;
10553 return true;
10556 /* Widening from less than 32-bits requires an extend operation. */
10557 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10559 /* We have SXTB/SXTH. */
10560 *cost = COSTS_N_INSNS (1);
10561 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10562 if (speed_p)
10563 *cost += extra_cost->alu.extend;
10565 else if (GET_MODE (XEXP (x, 0)) != SImode)
10567 /* Needs two shifts. */
10568 *cost = COSTS_N_INSNS (2);
10569 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10570 if (speed_p)
10571 *cost += 2 * extra_cost->alu.shift;
10574 /* Widening beyond 32-bits requires one more insn. */
10575 if (mode == DImode)
10577 *cost += COSTS_N_INSNS (1);
10578 if (speed_p)
10579 *cost += extra_cost->alu.shift;
10582 return true;
10584 case ZERO_EXTEND:
10585 if ((arm_arch4
10586 || GET_MODE (XEXP (x, 0)) == SImode
10587 || GET_MODE (XEXP (x, 0)) == QImode)
10588 && MEM_P (XEXP (x, 0)))
10590 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10592 if (mode == DImode)
10593 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10595 return true;
10598 /* Widening from less than 32-bits requires an extend operation. */
10599 if (GET_MODE (XEXP (x, 0)) == QImode)
10601 /* UXTB can be a shorter instruction in Thumb2, but it might
10602 be slower than the AND Rd, Rn, #255 alternative. When
10603 optimizing for speed it should never be slower to use
10604 AND, and we don't really model 16-bit vs 32-bit insns
10605 here. */
10606 *cost = COSTS_N_INSNS (1);
10607 if (speed_p)
10608 *cost += extra_cost->alu.logical;
10610 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10612 /* We have UXTB/UXTH. */
10613 *cost = COSTS_N_INSNS (1);
10614 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10615 if (speed_p)
10616 *cost += extra_cost->alu.extend;
10618 else if (GET_MODE (XEXP (x, 0)) != SImode)
10620 /* Needs two shifts. It's marginally preferable to use
10621 shifts rather than two BIC instructions as the second
10622 shift may merge with a subsequent insn as a shifter
10623 op. */
10624 *cost = COSTS_N_INSNS (2);
10625 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10626 if (speed_p)
10627 *cost += 2 * extra_cost->alu.shift;
10629 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10630 *cost = COSTS_N_INSNS (1);
10632 /* Widening beyond 32-bits requires one more insn. */
10633 if (mode == DImode)
10635 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10638 return true;
10640 case CONST_INT:
10641 *cost = 0;
10642 /* CONST_INT has no mode, so we cannot tell for sure how many
10643 insns are really going to be needed. The best we can do is
10644 look at the value passed. If it fits in SImode, then assume
10645 that's the mode it will be used for. Otherwise assume it
10646 will be used in DImode. */
10647 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10648 mode = SImode;
10649 else
10650 mode = DImode;
10652 /* Avoid blowing up in arm_gen_constant (). */
10653 if (!(outer_code == PLUS
10654 || outer_code == AND
10655 || outer_code == IOR
10656 || outer_code == XOR
10657 || outer_code == MINUS))
10658 outer_code = SET;
10660 const_int_cost:
10661 if (mode == SImode)
10663 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10664 INTVAL (x), NULL, NULL,
10665 0, 0));
10666 /* Extra costs? */
10668 else
10670 *cost += COSTS_N_INSNS (arm_gen_constant
10671 (outer_code, SImode, NULL,
10672 trunc_int_for_mode (INTVAL (x), SImode),
10673 NULL, NULL, 0, 0)
10674 + arm_gen_constant (outer_code, SImode, NULL,
10675 INTVAL (x) >> 32, NULL,
10676 NULL, 0, 0));
10677 /* Extra costs? */
10680 return true;
10682 case CONST:
10683 case LABEL_REF:
10684 case SYMBOL_REF:
10685 if (speed_p)
10687 if (arm_arch_thumb2 && !flag_pic)
10688 *cost = COSTS_N_INSNS (2);
10689 else
10690 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10692 else
10693 *cost = COSTS_N_INSNS (2);
10695 if (flag_pic)
10697 *cost += COSTS_N_INSNS (1);
10698 if (speed_p)
10699 *cost += extra_cost->alu.arith;
10702 return true;
10704 case CONST_FIXED:
10705 *cost = COSTS_N_INSNS (4);
10706 /* Fixme. */
10707 return true;
10709 case CONST_DOUBLE:
10710 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10711 && (mode == SFmode || !TARGET_VFP_SINGLE))
10713 if (vfp3_const_double_rtx (x))
10715 *cost = COSTS_N_INSNS (1);
10716 if (speed_p)
10717 *cost += extra_cost->fp[mode == DFmode].fpconst;
10718 return true;
10721 if (speed_p)
10723 *cost = COSTS_N_INSNS (1);
10724 if (mode == DFmode)
10725 *cost += extra_cost->ldst.loadd;
10726 else
10727 *cost += extra_cost->ldst.loadf;
10729 else
10730 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10732 return true;
10734 *cost = COSTS_N_INSNS (4);
10735 return true;
10737 case CONST_VECTOR:
10738 /* Fixme. */
10739 if (TARGET_NEON
10740 && TARGET_HARD_FLOAT
10741 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10742 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10743 *cost = COSTS_N_INSNS (1);
10744 else
10745 *cost = COSTS_N_INSNS (4);
10746 return true;
10748 case HIGH:
10749 case LO_SUM:
10750 *cost = COSTS_N_INSNS (1);
10751 /* When optimizing for size, we prefer constant pool entries to
10752 MOVW/MOVT pairs, so bump the cost of these slightly. */
10753 if (!speed_p)
10754 *cost += 1;
10755 return true;
10757 case CLZ:
10758 *cost = COSTS_N_INSNS (1);
10759 if (speed_p)
10760 *cost += extra_cost->alu.clz;
10761 return false;
10763 case SMIN:
10764 if (XEXP (x, 1) == const0_rtx)
10766 *cost = COSTS_N_INSNS (1);
10767 if (speed_p)
10768 *cost += extra_cost->alu.log_shift;
10769 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10770 return true;
10772 /* Fall through. */
10773 case SMAX:
10774 case UMIN:
10775 case UMAX:
10776 *cost = COSTS_N_INSNS (2);
10777 return false;
10779 case TRUNCATE:
10780 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10781 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10782 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10783 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10784 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10785 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10786 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10787 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10788 == ZERO_EXTEND))))
10790 *cost = COSTS_N_INSNS (1);
10791 if (speed_p)
10792 *cost += extra_cost->mult[1].extend;
10793 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10794 speed_p)
10795 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10796 0, speed_p));
10797 return true;
10799 *cost = LIBCALL_COST (1);
10800 return false;
10802 case UNSPEC:
10803 return arm_unspec_cost (x, outer_code, speed_p, cost);
10805 case PC:
10806 /* Reading the PC is like reading any other register. Writing it
10807 is more expensive, but we take that into account elsewhere. */
10808 *cost = 0;
10809 return true;
10811 case ZERO_EXTRACT:
10812 /* TODO: Simple zero_extract of bottom bits using AND. */
10813 /* Fall through. */
10814 case SIGN_EXTRACT:
10815 if (arm_arch6
10816 && mode == SImode
10817 && CONST_INT_P (XEXP (x, 1))
10818 && CONST_INT_P (XEXP (x, 2)))
10820 *cost = COSTS_N_INSNS (1);
10821 if (speed_p)
10822 *cost += extra_cost->alu.bfx;
10823 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10824 return true;
10826 /* Without UBFX/SBFX, need to resort to shift operations. */
10827 *cost = COSTS_N_INSNS (2);
10828 if (speed_p)
10829 *cost += 2 * extra_cost->alu.shift;
10830 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10831 return true;
10833 case FLOAT_EXTEND:
10834 if (TARGET_HARD_FLOAT)
10836 *cost = COSTS_N_INSNS (1);
10837 if (speed_p)
10838 *cost += extra_cost->fp[mode == DFmode].widen;
10839 if (!TARGET_FPU_ARMV8
10840 && GET_MODE (XEXP (x, 0)) == HFmode)
10842 /* Pre v8, widening HF->DF is a two-step process, first
10843 widening to SFmode. */
10844 *cost += COSTS_N_INSNS (1);
10845 if (speed_p)
10846 *cost += extra_cost->fp[0].widen;
10848 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10849 return true;
10852 *cost = LIBCALL_COST (1);
10853 return false;
10855 case FLOAT_TRUNCATE:
10856 if (TARGET_HARD_FLOAT)
10858 *cost = COSTS_N_INSNS (1);
10859 if (speed_p)
10860 *cost += extra_cost->fp[mode == DFmode].narrow;
10861 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10862 return true;
10863 /* Vector modes? */
10865 *cost = LIBCALL_COST (1);
10866 return false;
10868 case FMA:
10869 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10871 rtx op0 = XEXP (x, 0);
10872 rtx op1 = XEXP (x, 1);
10873 rtx op2 = XEXP (x, 2);
10875 *cost = COSTS_N_INSNS (1);
10877 /* vfms or vfnma. */
10878 if (GET_CODE (op0) == NEG)
10879 op0 = XEXP (op0, 0);
10881 /* vfnms or vfnma. */
10882 if (GET_CODE (op2) == NEG)
10883 op2 = XEXP (op2, 0);
10885 *cost += rtx_cost (op0, FMA, 0, speed_p);
10886 *cost += rtx_cost (op1, FMA, 1, speed_p);
10887 *cost += rtx_cost (op2, FMA, 2, speed_p);
10889 if (speed_p)
10890 *cost += extra_cost->fp[mode ==DFmode].fma;
10892 return true;
10895 *cost = LIBCALL_COST (3);
10896 return false;
10898 case FIX:
10899 case UNSIGNED_FIX:
10900 if (TARGET_HARD_FLOAT)
10902 if (GET_MODE_CLASS (mode) == MODE_INT)
10904 *cost = COSTS_N_INSNS (1);
10905 if (speed_p)
10906 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
10907 /* Strip of the 'cost' of rounding towards zero. */
10908 if (GET_CODE (XEXP (x, 0)) == FIX)
10909 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
10910 else
10911 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10912 /* ??? Increase the cost to deal with transferring from
10913 FP -> CORE registers? */
10914 return true;
10916 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10917 && TARGET_FPU_ARMV8)
10919 *cost = COSTS_N_INSNS (1);
10920 if (speed_p)
10921 *cost += extra_cost->fp[mode == DFmode].roundint;
10922 return false;
10924 /* Vector costs? */
10926 *cost = LIBCALL_COST (1);
10927 return false;
10929 case FLOAT:
10930 case UNSIGNED_FLOAT:
10931 if (TARGET_HARD_FLOAT)
10933 /* ??? Increase the cost to deal with transferring from CORE
10934 -> FP registers? */
10935 *cost = COSTS_N_INSNS (1);
10936 if (speed_p)
10937 *cost += extra_cost->fp[mode == DFmode].fromint;
10938 return false;
10940 *cost = LIBCALL_COST (1);
10941 return false;
10943 case CALL:
10944 *cost = COSTS_N_INSNS (1);
10945 return true;
10947 case ASM_OPERANDS:
10949 /* Just a guess. Guess number of instructions in the asm
10950 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10951 though (see PR60663). */
10952 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10953 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10955 *cost = COSTS_N_INSNS (asm_length + num_operands);
10956 return true;
10958 default:
10959 if (mode != VOIDmode)
10960 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10961 else
10962 *cost = COSTS_N_INSNS (4); /* Who knows? */
10963 return false;
10967 #undef HANDLE_NARROW_SHIFT_ARITH
10969 /* RTX costs when optimizing for size. */
10970 static bool
10971 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10972 int *total, bool speed)
10974 bool result;
10976 if (TARGET_OLD_RTX_COSTS
10977 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
10979 /* Old way. (Deprecated.) */
10980 if (!speed)
10981 result = arm_size_rtx_costs (x, (enum rtx_code) code,
10982 (enum rtx_code) outer_code, total);
10983 else
10984 result = current_tune->rtx_costs (x, (enum rtx_code) code,
10985 (enum rtx_code) outer_code, total,
10986 speed);
10988 else
10990 /* New way. */
10991 if (current_tune->insn_extra_cost)
10992 result = arm_new_rtx_costs (x, (enum rtx_code) code,
10993 (enum rtx_code) outer_code,
10994 current_tune->insn_extra_cost,
10995 total, speed);
10996 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
10997 && current_tune->insn_extra_cost != NULL */
10998 else
10999 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11000 (enum rtx_code) outer_code,
11001 &generic_extra_costs, total, speed);
11004 if (dump_file && (dump_flags & TDF_DETAILS))
11006 print_rtl_single (dump_file, x);
11007 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11008 *total, result ? "final" : "partial");
11010 return result;
11013 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11014 supported on any "slowmul" cores, so it can be ignored. */
11016 static bool
11017 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11018 int *total, bool speed)
11020 machine_mode mode = GET_MODE (x);
11022 if (TARGET_THUMB)
11024 *total = thumb1_rtx_costs (x, code, outer_code);
11025 return true;
11028 switch (code)
11030 case MULT:
11031 if (GET_MODE_CLASS (mode) == MODE_FLOAT
11032 || mode == DImode)
11034 *total = COSTS_N_INSNS (20);
11035 return false;
11038 if (CONST_INT_P (XEXP (x, 1)))
11040 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11041 & (unsigned HOST_WIDE_INT) 0xffffffff);
11042 int cost, const_ok = const_ok_for_arm (i);
11043 int j, booth_unit_size;
11045 /* Tune as appropriate. */
11046 cost = const_ok ? 4 : 8;
11047 booth_unit_size = 2;
11048 for (j = 0; i && j < 32; j += booth_unit_size)
11050 i >>= booth_unit_size;
11051 cost++;
11054 *total = COSTS_N_INSNS (cost);
11055 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
11056 return true;
11059 *total = COSTS_N_INSNS (20);
11060 return false;
11062 default:
11063 return arm_rtx_costs_1 (x, outer_code, total, speed);;
11068 /* RTX cost for cores with a fast multiply unit (M variants). */
11070 static bool
11071 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11072 int *total, bool speed)
11074 machine_mode mode = GET_MODE (x);
11076 if (TARGET_THUMB1)
11078 *total = thumb1_rtx_costs (x, code, outer_code);
11079 return true;
11082 /* ??? should thumb2 use different costs? */
11083 switch (code)
11085 case MULT:
11086 /* There is no point basing this on the tuning, since it is always the
11087 fast variant if it exists at all. */
11088 if (mode == DImode
11089 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11090 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11091 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11093 *total = COSTS_N_INSNS(2);
11094 return false;
11098 if (mode == DImode)
11100 *total = COSTS_N_INSNS (5);
11101 return false;
11104 if (CONST_INT_P (XEXP (x, 1)))
11106 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11107 & (unsigned HOST_WIDE_INT) 0xffffffff);
11108 int cost, const_ok = const_ok_for_arm (i);
11109 int j, booth_unit_size;
11111 /* Tune as appropriate. */
11112 cost = const_ok ? 4 : 8;
11113 booth_unit_size = 8;
11114 for (j = 0; i && j < 32; j += booth_unit_size)
11116 i >>= booth_unit_size;
11117 cost++;
11120 *total = COSTS_N_INSNS(cost);
11121 return false;
11124 if (mode == SImode)
11126 *total = COSTS_N_INSNS (4);
11127 return false;
11130 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11132 if (TARGET_HARD_FLOAT
11133 && (mode == SFmode
11134 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11136 *total = COSTS_N_INSNS (1);
11137 return false;
11141 /* Requires a lib call */
11142 *total = COSTS_N_INSNS (20);
11143 return false;
11145 default:
11146 return arm_rtx_costs_1 (x, outer_code, total, speed);
11151 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11152 so it can be ignored. */
11154 static bool
11155 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11156 int *total, bool speed)
11158 machine_mode mode = GET_MODE (x);
11160 if (TARGET_THUMB)
11162 *total = thumb1_rtx_costs (x, code, outer_code);
11163 return true;
11166 switch (code)
11168 case COMPARE:
11169 if (GET_CODE (XEXP (x, 0)) != MULT)
11170 return arm_rtx_costs_1 (x, outer_code, total, speed);
11172 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11173 will stall until the multiplication is complete. */
11174 *total = COSTS_N_INSNS (3);
11175 return false;
11177 case MULT:
11178 /* There is no point basing this on the tuning, since it is always the
11179 fast variant if it exists at all. */
11180 if (mode == DImode
11181 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11182 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11183 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11185 *total = COSTS_N_INSNS (2);
11186 return false;
11190 if (mode == DImode)
11192 *total = COSTS_N_INSNS (5);
11193 return false;
11196 if (CONST_INT_P (XEXP (x, 1)))
11198 /* If operand 1 is a constant we can more accurately
11199 calculate the cost of the multiply. The multiplier can
11200 retire 15 bits on the first cycle and a further 12 on the
11201 second. We do, of course, have to load the constant into
11202 a register first. */
11203 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11204 /* There's a general overhead of one cycle. */
11205 int cost = 1;
11206 unsigned HOST_WIDE_INT masked_const;
11208 if (i & 0x80000000)
11209 i = ~i;
11211 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11213 masked_const = i & 0xffff8000;
11214 if (masked_const != 0)
11216 cost++;
11217 masked_const = i & 0xf8000000;
11218 if (masked_const != 0)
11219 cost++;
11221 *total = COSTS_N_INSNS (cost);
11222 return false;
11225 if (mode == SImode)
11227 *total = COSTS_N_INSNS (3);
11228 return false;
11231 /* Requires a lib call */
11232 *total = COSTS_N_INSNS (20);
11233 return false;
11235 default:
11236 return arm_rtx_costs_1 (x, outer_code, total, speed);
11241 /* RTX costs for 9e (and later) cores. */
11243 static bool
11244 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11245 int *total, bool speed)
11247 machine_mode mode = GET_MODE (x);
11249 if (TARGET_THUMB1)
11251 switch (code)
11253 case MULT:
11254 /* Small multiply: 32 cycles for an integer multiply inst. */
11255 if (arm_arch6m && arm_m_profile_small_mul)
11256 *total = COSTS_N_INSNS (32);
11257 else
11258 *total = COSTS_N_INSNS (3);
11259 return true;
11261 default:
11262 *total = thumb1_rtx_costs (x, code, outer_code);
11263 return true;
11267 switch (code)
11269 case MULT:
11270 /* There is no point basing this on the tuning, since it is always the
11271 fast variant if it exists at all. */
11272 if (mode == DImode
11273 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11274 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11275 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11277 *total = COSTS_N_INSNS (2);
11278 return false;
11282 if (mode == DImode)
11284 *total = COSTS_N_INSNS (5);
11285 return false;
11288 if (mode == SImode)
11290 *total = COSTS_N_INSNS (2);
11291 return false;
11294 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11296 if (TARGET_HARD_FLOAT
11297 && (mode == SFmode
11298 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11300 *total = COSTS_N_INSNS (1);
11301 return false;
11305 *total = COSTS_N_INSNS (20);
11306 return false;
11308 default:
11309 return arm_rtx_costs_1 (x, outer_code, total, speed);
11312 /* All address computations that can be done are free, but rtx cost returns
11313 the same for practically all of them. So we weight the different types
11314 of address here in the order (most pref first):
11315 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11316 static inline int
11317 arm_arm_address_cost (rtx x)
11319 enum rtx_code c = GET_CODE (x);
11321 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11322 return 0;
11323 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11324 return 10;
11326 if (c == PLUS)
11328 if (CONST_INT_P (XEXP (x, 1)))
11329 return 2;
11331 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11332 return 3;
11334 return 4;
11337 return 6;
11340 static inline int
11341 arm_thumb_address_cost (rtx x)
11343 enum rtx_code c = GET_CODE (x);
11345 if (c == REG)
11346 return 1;
11347 if (c == PLUS
11348 && REG_P (XEXP (x, 0))
11349 && CONST_INT_P (XEXP (x, 1)))
11350 return 1;
11352 return 2;
11355 static int
11356 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11357 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11359 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11362 /* Adjust cost hook for XScale. */
11363 static bool
11364 xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11366 /* Some true dependencies can have a higher cost depending
11367 on precisely how certain input operands are used. */
11368 if (REG_NOTE_KIND(link) == 0
11369 && recog_memoized (insn) >= 0
11370 && recog_memoized (dep) >= 0)
11372 int shift_opnum = get_attr_shift (insn);
11373 enum attr_type attr_type = get_attr_type (dep);
11375 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11376 operand for INSN. If we have a shifted input operand and the
11377 instruction we depend on is another ALU instruction, then we may
11378 have to account for an additional stall. */
11379 if (shift_opnum != 0
11380 && (attr_type == TYPE_ALU_SHIFT_IMM
11381 || attr_type == TYPE_ALUS_SHIFT_IMM
11382 || attr_type == TYPE_LOGIC_SHIFT_IMM
11383 || attr_type == TYPE_LOGICS_SHIFT_IMM
11384 || attr_type == TYPE_ALU_SHIFT_REG
11385 || attr_type == TYPE_ALUS_SHIFT_REG
11386 || attr_type == TYPE_LOGIC_SHIFT_REG
11387 || attr_type == TYPE_LOGICS_SHIFT_REG
11388 || attr_type == TYPE_MOV_SHIFT
11389 || attr_type == TYPE_MVN_SHIFT
11390 || attr_type == TYPE_MOV_SHIFT_REG
11391 || attr_type == TYPE_MVN_SHIFT_REG))
11393 rtx shifted_operand;
11394 int opno;
11396 /* Get the shifted operand. */
11397 extract_insn (insn);
11398 shifted_operand = recog_data.operand[shift_opnum];
11400 /* Iterate over all the operands in DEP. If we write an operand
11401 that overlaps with SHIFTED_OPERAND, then we have increase the
11402 cost of this dependency. */
11403 extract_insn (dep);
11404 preprocess_constraints (dep);
11405 for (opno = 0; opno < recog_data.n_operands; opno++)
11407 /* We can ignore strict inputs. */
11408 if (recog_data.operand_type[opno] == OP_IN)
11409 continue;
11411 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11412 shifted_operand))
11414 *cost = 2;
11415 return false;
11420 return true;
11423 /* Adjust cost hook for Cortex A9. */
11424 static bool
11425 cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11427 switch (REG_NOTE_KIND (link))
11429 case REG_DEP_ANTI:
11430 *cost = 0;
11431 return false;
11433 case REG_DEP_TRUE:
11434 case REG_DEP_OUTPUT:
11435 if (recog_memoized (insn) >= 0
11436 && recog_memoized (dep) >= 0)
11438 if (GET_CODE (PATTERN (insn)) == SET)
11440 if (GET_MODE_CLASS
11441 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11442 || GET_MODE_CLASS
11443 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11445 enum attr_type attr_type_insn = get_attr_type (insn);
11446 enum attr_type attr_type_dep = get_attr_type (dep);
11448 /* By default all dependencies of the form
11449 s0 = s0 <op> s1
11450 s0 = s0 <op> s2
11451 have an extra latency of 1 cycle because
11452 of the input and output dependency in this
11453 case. However this gets modeled as an true
11454 dependency and hence all these checks. */
11455 if (REG_P (SET_DEST (PATTERN (insn)))
11456 && REG_P (SET_DEST (PATTERN (dep)))
11457 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11458 SET_DEST (PATTERN (dep))))
11460 /* FMACS is a special case where the dependent
11461 instruction can be issued 3 cycles before
11462 the normal latency in case of an output
11463 dependency. */
11464 if ((attr_type_insn == TYPE_FMACS
11465 || attr_type_insn == TYPE_FMACD)
11466 && (attr_type_dep == TYPE_FMACS
11467 || attr_type_dep == TYPE_FMACD))
11469 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11470 *cost = insn_default_latency (dep) - 3;
11471 else
11472 *cost = insn_default_latency (dep);
11473 return false;
11475 else
11477 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11478 *cost = insn_default_latency (dep) + 1;
11479 else
11480 *cost = insn_default_latency (dep);
11482 return false;
11487 break;
11489 default:
11490 gcc_unreachable ();
11493 return true;
11496 /* Adjust cost hook for FA726TE. */
11497 static bool
11498 fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11500 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11501 have penalty of 3. */
11502 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11503 && recog_memoized (insn) >= 0
11504 && recog_memoized (dep) >= 0
11505 && get_attr_conds (dep) == CONDS_SET)
11507 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11508 if (get_attr_conds (insn) == CONDS_USE
11509 && get_attr_type (insn) != TYPE_BRANCH)
11511 *cost = 3;
11512 return false;
11515 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11516 || get_attr_conds (insn) == CONDS_USE)
11518 *cost = 0;
11519 return false;
11523 return true;
11526 /* Implement TARGET_REGISTER_MOVE_COST.
11528 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11529 it is typically more expensive than a single memory access. We set
11530 the cost to less than two memory accesses so that floating
11531 point to integer conversion does not go through memory. */
11534 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11535 reg_class_t from, reg_class_t to)
11537 if (TARGET_32BIT)
11539 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11540 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11541 return 15;
11542 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11543 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11544 return 4;
11545 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11546 return 20;
11547 else
11548 return 2;
11550 else
11552 if (from == HI_REGS || to == HI_REGS)
11553 return 4;
11554 else
11555 return 2;
11559 /* Implement TARGET_MEMORY_MOVE_COST. */
11562 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11563 bool in ATTRIBUTE_UNUSED)
11565 if (TARGET_32BIT)
11566 return 10;
11567 else
11569 if (GET_MODE_SIZE (mode) < 4)
11570 return 8;
11571 else
11572 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11576 /* Vectorizer cost model implementation. */
11578 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11579 static int
11580 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11581 tree vectype,
11582 int misalign ATTRIBUTE_UNUSED)
11584 unsigned elements;
11586 switch (type_of_cost)
11588 case scalar_stmt:
11589 return current_tune->vec_costs->scalar_stmt_cost;
11591 case scalar_load:
11592 return current_tune->vec_costs->scalar_load_cost;
11594 case scalar_store:
11595 return current_tune->vec_costs->scalar_store_cost;
11597 case vector_stmt:
11598 return current_tune->vec_costs->vec_stmt_cost;
11600 case vector_load:
11601 return current_tune->vec_costs->vec_align_load_cost;
11603 case vector_store:
11604 return current_tune->vec_costs->vec_store_cost;
11606 case vec_to_scalar:
11607 return current_tune->vec_costs->vec_to_scalar_cost;
11609 case scalar_to_vec:
11610 return current_tune->vec_costs->scalar_to_vec_cost;
11612 case unaligned_load:
11613 return current_tune->vec_costs->vec_unalign_load_cost;
11615 case unaligned_store:
11616 return current_tune->vec_costs->vec_unalign_store_cost;
11618 case cond_branch_taken:
11619 return current_tune->vec_costs->cond_taken_branch_cost;
11621 case cond_branch_not_taken:
11622 return current_tune->vec_costs->cond_not_taken_branch_cost;
11624 case vec_perm:
11625 case vec_promote_demote:
11626 return current_tune->vec_costs->vec_stmt_cost;
11628 case vec_construct:
11629 elements = TYPE_VECTOR_SUBPARTS (vectype);
11630 return elements / 2 + 1;
11632 default:
11633 gcc_unreachable ();
11637 /* Implement targetm.vectorize.add_stmt_cost. */
11639 static unsigned
11640 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11641 struct _stmt_vec_info *stmt_info, int misalign,
11642 enum vect_cost_model_location where)
11644 unsigned *cost = (unsigned *) data;
11645 unsigned retval = 0;
11647 if (flag_vect_cost_model)
11649 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11650 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11652 /* Statements in an inner loop relative to the loop being
11653 vectorized are weighted more heavily. The value here is
11654 arbitrary and could potentially be improved with analysis. */
11655 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11656 count *= 50; /* FIXME. */
11658 retval = (unsigned) (count * stmt_cost);
11659 cost[where] += retval;
11662 return retval;
11665 /* Return true if and only if this insn can dual-issue only as older. */
11666 static bool
11667 cortexa7_older_only (rtx_insn *insn)
11669 if (recog_memoized (insn) < 0)
11670 return false;
11672 switch (get_attr_type (insn))
11674 case TYPE_ALU_DSP_REG:
11675 case TYPE_ALU_SREG:
11676 case TYPE_ALUS_SREG:
11677 case TYPE_LOGIC_REG:
11678 case TYPE_LOGICS_REG:
11679 case TYPE_ADC_REG:
11680 case TYPE_ADCS_REG:
11681 case TYPE_ADR:
11682 case TYPE_BFM:
11683 case TYPE_REV:
11684 case TYPE_MVN_REG:
11685 case TYPE_SHIFT_IMM:
11686 case TYPE_SHIFT_REG:
11687 case TYPE_LOAD_BYTE:
11688 case TYPE_LOAD1:
11689 case TYPE_STORE1:
11690 case TYPE_FFARITHS:
11691 case TYPE_FADDS:
11692 case TYPE_FFARITHD:
11693 case TYPE_FADDD:
11694 case TYPE_FMOV:
11695 case TYPE_F_CVT:
11696 case TYPE_FCMPS:
11697 case TYPE_FCMPD:
11698 case TYPE_FCONSTS:
11699 case TYPE_FCONSTD:
11700 case TYPE_FMULS:
11701 case TYPE_FMACS:
11702 case TYPE_FMULD:
11703 case TYPE_FMACD:
11704 case TYPE_FDIVS:
11705 case TYPE_FDIVD:
11706 case TYPE_F_MRC:
11707 case TYPE_F_MRRC:
11708 case TYPE_F_FLAG:
11709 case TYPE_F_LOADS:
11710 case TYPE_F_STORES:
11711 return true;
11712 default:
11713 return false;
11717 /* Return true if and only if this insn can dual-issue as younger. */
11718 static bool
11719 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11721 if (recog_memoized (insn) < 0)
11723 if (verbose > 5)
11724 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11725 return false;
11728 switch (get_attr_type (insn))
11730 case TYPE_ALU_IMM:
11731 case TYPE_ALUS_IMM:
11732 case TYPE_LOGIC_IMM:
11733 case TYPE_LOGICS_IMM:
11734 case TYPE_EXTEND:
11735 case TYPE_MVN_IMM:
11736 case TYPE_MOV_IMM:
11737 case TYPE_MOV_REG:
11738 case TYPE_MOV_SHIFT:
11739 case TYPE_MOV_SHIFT_REG:
11740 case TYPE_BRANCH:
11741 case TYPE_CALL:
11742 return true;
11743 default:
11744 return false;
11749 /* Look for an instruction that can dual issue only as an older
11750 instruction, and move it in front of any instructions that can
11751 dual-issue as younger, while preserving the relative order of all
11752 other instructions in the ready list. This is a hueuristic to help
11753 dual-issue in later cycles, by postponing issue of more flexible
11754 instructions. This heuristic may affect dual issue opportunities
11755 in the current cycle. */
11756 static void
11757 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11758 int *n_readyp, int clock)
11760 int i;
11761 int first_older_only = -1, first_younger = -1;
11763 if (verbose > 5)
11764 fprintf (file,
11765 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11766 clock,
11767 *n_readyp);
11769 /* Traverse the ready list from the head (the instruction to issue
11770 first), and looking for the first instruction that can issue as
11771 younger and the first instruction that can dual-issue only as
11772 older. */
11773 for (i = *n_readyp - 1; i >= 0; i--)
11775 rtx_insn *insn = ready[i];
11776 if (cortexa7_older_only (insn))
11778 first_older_only = i;
11779 if (verbose > 5)
11780 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11781 break;
11783 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11784 first_younger = i;
11787 /* Nothing to reorder because either no younger insn found or insn
11788 that can dual-issue only as older appears before any insn that
11789 can dual-issue as younger. */
11790 if (first_younger == -1)
11792 if (verbose > 5)
11793 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11794 return;
11797 /* Nothing to reorder because no older-only insn in the ready list. */
11798 if (first_older_only == -1)
11800 if (verbose > 5)
11801 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11802 return;
11805 /* Move first_older_only insn before first_younger. */
11806 if (verbose > 5)
11807 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11808 INSN_UID(ready [first_older_only]),
11809 INSN_UID(ready [first_younger]));
11810 rtx_insn *first_older_only_insn = ready [first_older_only];
11811 for (i = first_older_only; i < first_younger; i++)
11813 ready[i] = ready[i+1];
11816 ready[i] = first_older_only_insn;
11817 return;
11820 /* Implement TARGET_SCHED_REORDER. */
11821 static int
11822 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11823 int clock)
11825 switch (arm_tune)
11827 case cortexa7:
11828 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11829 break;
11830 default:
11831 /* Do nothing for other cores. */
11832 break;
11835 return arm_issue_rate ();
11838 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11839 It corrects the value of COST based on the relationship between
11840 INSN and DEP through the dependence LINK. It returns the new
11841 value. There is a per-core adjust_cost hook to adjust scheduler costs
11842 and the per-core hook can choose to completely override the generic
11843 adjust_cost function. Only put bits of code into arm_adjust_cost that
11844 are common across all cores. */
11845 static int
11846 arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
11848 rtx i_pat, d_pat;
11850 /* When generating Thumb-1 code, we want to place flag-setting operations
11851 close to a conditional branch which depends on them, so that we can
11852 omit the comparison. */
11853 if (TARGET_THUMB1
11854 && REG_NOTE_KIND (link) == 0
11855 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11856 && recog_memoized (dep) >= 0
11857 && get_attr_conds (dep) == CONDS_SET)
11858 return 0;
11860 if (current_tune->sched_adjust_cost != NULL)
11862 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11863 return cost;
11866 /* XXX Is this strictly true? */
11867 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11868 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11869 return 0;
11871 /* Call insns don't incur a stall, even if they follow a load. */
11872 if (REG_NOTE_KIND (link) == 0
11873 && CALL_P (insn))
11874 return 1;
11876 if ((i_pat = single_set (insn)) != NULL
11877 && MEM_P (SET_SRC (i_pat))
11878 && (d_pat = single_set (dep)) != NULL
11879 && MEM_P (SET_DEST (d_pat)))
11881 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11882 /* This is a load after a store, there is no conflict if the load reads
11883 from a cached area. Assume that loads from the stack, and from the
11884 constant pool are cached, and that others will miss. This is a
11885 hack. */
11887 if ((GET_CODE (src_mem) == SYMBOL_REF
11888 && CONSTANT_POOL_ADDRESS_P (src_mem))
11889 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11890 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11891 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11892 return 1;
11895 return cost;
11899 arm_max_conditional_execute (void)
11901 return max_insns_skipped;
11904 static int
11905 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11907 if (TARGET_32BIT)
11908 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11909 else
11910 return (optimize > 0) ? 2 : 0;
11913 static int
11914 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11916 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11919 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11920 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11921 sequences of non-executed instructions in IT blocks probably take the same
11922 amount of time as executed instructions (and the IT instruction itself takes
11923 space in icache). This function was experimentally determined to give good
11924 results on a popular embedded benchmark. */
11926 static int
11927 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11929 return (TARGET_32BIT && speed_p) ? 1
11930 : arm_default_branch_cost (speed_p, predictable_p);
11933 static int
11934 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11936 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11939 static bool fp_consts_inited = false;
11941 static REAL_VALUE_TYPE value_fp0;
11943 static void
11944 init_fp_table (void)
11946 REAL_VALUE_TYPE r;
11948 r = REAL_VALUE_ATOF ("0", DFmode);
11949 value_fp0 = r;
11950 fp_consts_inited = true;
11953 /* Return TRUE if rtx X is a valid immediate FP constant. */
11955 arm_const_double_rtx (rtx x)
11957 REAL_VALUE_TYPE r;
11959 if (!fp_consts_inited)
11960 init_fp_table ();
11962 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11963 if (REAL_VALUE_MINUS_ZERO (r))
11964 return 0;
11966 if (REAL_VALUES_EQUAL (r, value_fp0))
11967 return 1;
11969 return 0;
11972 /* VFPv3 has a fairly wide range of representable immediates, formed from
11973 "quarter-precision" floating-point values. These can be evaluated using this
11974 formula (with ^ for exponentiation):
11976 -1^s * n * 2^-r
11978 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11979 16 <= n <= 31 and 0 <= r <= 7.
11981 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11983 - A (most-significant) is the sign bit.
11984 - BCD are the exponent (encoded as r XOR 3).
11985 - EFGH are the mantissa (encoded as n - 16).
11988 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11989 fconst[sd] instruction, or -1 if X isn't suitable. */
11990 static int
11991 vfp3_const_double_index (rtx x)
11993 REAL_VALUE_TYPE r, m;
11994 int sign, exponent;
11995 unsigned HOST_WIDE_INT mantissa, mant_hi;
11996 unsigned HOST_WIDE_INT mask;
11997 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11998 bool fail;
12000 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12001 return -1;
12003 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12005 /* We can't represent these things, so detect them first. */
12006 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12007 return -1;
12009 /* Extract sign, exponent and mantissa. */
12010 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12011 r = real_value_abs (&r);
12012 exponent = REAL_EXP (&r);
12013 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12014 highest (sign) bit, with a fixed binary point at bit point_pos.
12015 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12016 bits for the mantissa, this may fail (low bits would be lost). */
12017 real_ldexp (&m, &r, point_pos - exponent);
12018 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12019 mantissa = w.elt (0);
12020 mant_hi = w.elt (1);
12022 /* If there are bits set in the low part of the mantissa, we can't
12023 represent this value. */
12024 if (mantissa != 0)
12025 return -1;
12027 /* Now make it so that mantissa contains the most-significant bits, and move
12028 the point_pos to indicate that the least-significant bits have been
12029 discarded. */
12030 point_pos -= HOST_BITS_PER_WIDE_INT;
12031 mantissa = mant_hi;
12033 /* We can permit four significant bits of mantissa only, plus a high bit
12034 which is always 1. */
12035 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12036 if ((mantissa & mask) != 0)
12037 return -1;
12039 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12040 mantissa >>= point_pos - 5;
12042 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12043 floating-point immediate zero with Neon using an integer-zero load, but
12044 that case is handled elsewhere.) */
12045 if (mantissa == 0)
12046 return -1;
12048 gcc_assert (mantissa >= 16 && mantissa <= 31);
12050 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12051 normalized significands are in the range [1, 2). (Our mantissa is shifted
12052 left 4 places at this point relative to normalized IEEE754 values). GCC
12053 internally uses [0.5, 1) (see real.c), so the exponent returned from
12054 REAL_EXP must be altered. */
12055 exponent = 5 - exponent;
12057 if (exponent < 0 || exponent > 7)
12058 return -1;
12060 /* Sign, mantissa and exponent are now in the correct form to plug into the
12061 formula described in the comment above. */
12062 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12065 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12067 vfp3_const_double_rtx (rtx x)
12069 if (!TARGET_VFP3)
12070 return 0;
12072 return vfp3_const_double_index (x) != -1;
12075 /* Recognize immediates which can be used in various Neon instructions. Legal
12076 immediates are described by the following table (for VMVN variants, the
12077 bitwise inverse of the constant shown is recognized. In either case, VMOV
12078 is output and the correct instruction to use for a given constant is chosen
12079 by the assembler). The constant shown is replicated across all elements of
12080 the destination vector.
12082 insn elems variant constant (binary)
12083 ---- ----- ------- -----------------
12084 vmov i32 0 00000000 00000000 00000000 abcdefgh
12085 vmov i32 1 00000000 00000000 abcdefgh 00000000
12086 vmov i32 2 00000000 abcdefgh 00000000 00000000
12087 vmov i32 3 abcdefgh 00000000 00000000 00000000
12088 vmov i16 4 00000000 abcdefgh
12089 vmov i16 5 abcdefgh 00000000
12090 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12091 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12092 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12093 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12094 vmvn i16 10 00000000 abcdefgh
12095 vmvn i16 11 abcdefgh 00000000
12096 vmov i32 12 00000000 00000000 abcdefgh 11111111
12097 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12098 vmov i32 14 00000000 abcdefgh 11111111 11111111
12099 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12100 vmov i8 16 abcdefgh
12101 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12102 eeeeeeee ffffffff gggggggg hhhhhhhh
12103 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12104 vmov f32 19 00000000 00000000 00000000 00000000
12106 For case 18, B = !b. Representable values are exactly those accepted by
12107 vfp3_const_double_index, but are output as floating-point numbers rather
12108 than indices.
12110 For case 19, we will change it to vmov.i32 when assembling.
12112 Variants 0-5 (inclusive) may also be used as immediates for the second
12113 operand of VORR/VBIC instructions.
12115 The INVERSE argument causes the bitwise inverse of the given operand to be
12116 recognized instead (used for recognizing legal immediates for the VAND/VORN
12117 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12118 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12119 output, rather than the real insns vbic/vorr).
12121 INVERSE makes no difference to the recognition of float vectors.
12123 The return value is the variant of immediate as shown in the above table, or
12124 -1 if the given value doesn't match any of the listed patterns.
12126 static int
12127 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12128 rtx *modconst, int *elementwidth)
12130 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12131 matches = 1; \
12132 for (i = 0; i < idx; i += (STRIDE)) \
12133 if (!(TEST)) \
12134 matches = 0; \
12135 if (matches) \
12137 immtype = (CLASS); \
12138 elsize = (ELSIZE); \
12139 break; \
12142 unsigned int i, elsize = 0, idx = 0, n_elts;
12143 unsigned int innersize;
12144 unsigned char bytes[16];
12145 int immtype = -1, matches;
12146 unsigned int invmask = inverse ? 0xff : 0;
12147 bool vector = GET_CODE (op) == CONST_VECTOR;
12149 if (vector)
12151 n_elts = CONST_VECTOR_NUNITS (op);
12152 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12154 else
12156 n_elts = 1;
12157 if (mode == VOIDmode)
12158 mode = DImode;
12159 innersize = GET_MODE_SIZE (mode);
12162 /* Vectors of float constants. */
12163 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12165 rtx el0 = CONST_VECTOR_ELT (op, 0);
12166 REAL_VALUE_TYPE r0;
12168 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12169 return -1;
12171 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12173 for (i = 1; i < n_elts; i++)
12175 rtx elt = CONST_VECTOR_ELT (op, i);
12176 REAL_VALUE_TYPE re;
12178 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12180 if (!REAL_VALUES_EQUAL (r0, re))
12181 return -1;
12184 if (modconst)
12185 *modconst = CONST_VECTOR_ELT (op, 0);
12187 if (elementwidth)
12188 *elementwidth = 0;
12190 if (el0 == CONST0_RTX (GET_MODE (el0)))
12191 return 19;
12192 else
12193 return 18;
12196 /* Splat vector constant out into a byte vector. */
12197 for (i = 0; i < n_elts; i++)
12199 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12200 unsigned HOST_WIDE_INT elpart;
12201 unsigned int part, parts;
12203 if (CONST_INT_P (el))
12205 elpart = INTVAL (el);
12206 parts = 1;
12208 else if (CONST_DOUBLE_P (el))
12210 elpart = CONST_DOUBLE_LOW (el);
12211 parts = 2;
12213 else
12214 gcc_unreachable ();
12216 for (part = 0; part < parts; part++)
12218 unsigned int byte;
12219 for (byte = 0; byte < innersize; byte++)
12221 bytes[idx++] = (elpart & 0xff) ^ invmask;
12222 elpart >>= BITS_PER_UNIT;
12224 if (CONST_DOUBLE_P (el))
12225 elpart = CONST_DOUBLE_HIGH (el);
12229 /* Sanity check. */
12230 gcc_assert (idx == GET_MODE_SIZE (mode));
12234 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12235 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12237 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12238 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12240 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12241 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12243 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12244 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12246 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12248 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12250 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12251 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12253 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12254 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12256 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12257 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12259 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12260 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12262 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12264 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12266 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12267 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12269 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12270 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12272 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12273 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12275 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12276 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12278 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12280 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12281 && bytes[i] == bytes[(i + 8) % idx]);
12283 while (0);
12285 if (immtype == -1)
12286 return -1;
12288 if (elementwidth)
12289 *elementwidth = elsize;
12291 if (modconst)
12293 unsigned HOST_WIDE_INT imm = 0;
12295 /* Un-invert bytes of recognized vector, if necessary. */
12296 if (invmask != 0)
12297 for (i = 0; i < idx; i++)
12298 bytes[i] ^= invmask;
12300 if (immtype == 17)
12302 /* FIXME: Broken on 32-bit H_W_I hosts. */
12303 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12305 for (i = 0; i < 8; i++)
12306 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12307 << (i * BITS_PER_UNIT);
12309 *modconst = GEN_INT (imm);
12311 else
12313 unsigned HOST_WIDE_INT imm = 0;
12315 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12316 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12318 *modconst = GEN_INT (imm);
12322 return immtype;
12323 #undef CHECK
12326 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12327 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12328 float elements), and a modified constant (whatever should be output for a
12329 VMOV) in *MODCONST. */
12332 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12333 rtx *modconst, int *elementwidth)
12335 rtx tmpconst;
12336 int tmpwidth;
12337 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12339 if (retval == -1)
12340 return 0;
12342 if (modconst)
12343 *modconst = tmpconst;
12345 if (elementwidth)
12346 *elementwidth = tmpwidth;
12348 return 1;
12351 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12352 the immediate is valid, write a constant suitable for using as an operand
12353 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12354 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12357 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12358 rtx *modconst, int *elementwidth)
12360 rtx tmpconst;
12361 int tmpwidth;
12362 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12364 if (retval < 0 || retval > 5)
12365 return 0;
12367 if (modconst)
12368 *modconst = tmpconst;
12370 if (elementwidth)
12371 *elementwidth = tmpwidth;
12373 return 1;
12376 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12377 the immediate is valid, write a constant suitable for using as an operand
12378 to VSHR/VSHL to *MODCONST and the corresponding element width to
12379 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12380 because they have different limitations. */
12383 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12384 rtx *modconst, int *elementwidth,
12385 bool isleftshift)
12387 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12388 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12389 unsigned HOST_WIDE_INT last_elt = 0;
12390 unsigned HOST_WIDE_INT maxshift;
12392 /* Split vector constant out into a byte vector. */
12393 for (i = 0; i < n_elts; i++)
12395 rtx el = CONST_VECTOR_ELT (op, i);
12396 unsigned HOST_WIDE_INT elpart;
12398 if (CONST_INT_P (el))
12399 elpart = INTVAL (el);
12400 else if (CONST_DOUBLE_P (el))
12401 return 0;
12402 else
12403 gcc_unreachable ();
12405 if (i != 0 && elpart != last_elt)
12406 return 0;
12408 last_elt = elpart;
12411 /* Shift less than element size. */
12412 maxshift = innersize * 8;
12414 if (isleftshift)
12416 /* Left shift immediate value can be from 0 to <size>-1. */
12417 if (last_elt >= maxshift)
12418 return 0;
12420 else
12422 /* Right shift immediate value can be from 1 to <size>. */
12423 if (last_elt == 0 || last_elt > maxshift)
12424 return 0;
12427 if (elementwidth)
12428 *elementwidth = innersize * 8;
12430 if (modconst)
12431 *modconst = CONST_VECTOR_ELT (op, 0);
12433 return 1;
12436 /* Return a string suitable for output of Neon immediate logic operation
12437 MNEM. */
12439 char *
12440 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12441 int inverse, int quad)
12443 int width, is_valid;
12444 static char templ[40];
12446 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12448 gcc_assert (is_valid != 0);
12450 if (quad)
12451 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12452 else
12453 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12455 return templ;
12458 /* Return a string suitable for output of Neon immediate shift operation
12459 (VSHR or VSHL) MNEM. */
12461 char *
12462 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12463 machine_mode mode, int quad,
12464 bool isleftshift)
12466 int width, is_valid;
12467 static char templ[40];
12469 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12470 gcc_assert (is_valid != 0);
12472 if (quad)
12473 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12474 else
12475 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12477 return templ;
12480 /* Output a sequence of pairwise operations to implement a reduction.
12481 NOTE: We do "too much work" here, because pairwise operations work on two
12482 registers-worth of operands in one go. Unfortunately we can't exploit those
12483 extra calculations to do the full operation in fewer steps, I don't think.
12484 Although all vector elements of the result but the first are ignored, we
12485 actually calculate the same result in each of the elements. An alternative
12486 such as initially loading a vector with zero to use as each of the second
12487 operands would use up an additional register and take an extra instruction,
12488 for no particular gain. */
12490 void
12491 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12492 rtx (*reduc) (rtx, rtx, rtx))
12494 machine_mode inner = GET_MODE_INNER (mode);
12495 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12496 rtx tmpsum = op1;
12498 for (i = parts / 2; i >= 1; i /= 2)
12500 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12501 emit_insn (reduc (dest, tmpsum, tmpsum));
12502 tmpsum = dest;
12506 /* If VALS is a vector constant that can be loaded into a register
12507 using VDUP, generate instructions to do so and return an RTX to
12508 assign to the register. Otherwise return NULL_RTX. */
12510 static rtx
12511 neon_vdup_constant (rtx vals)
12513 machine_mode mode = GET_MODE (vals);
12514 machine_mode inner_mode = GET_MODE_INNER (mode);
12515 int n_elts = GET_MODE_NUNITS (mode);
12516 bool all_same = true;
12517 rtx x;
12518 int i;
12520 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12521 return NULL_RTX;
12523 for (i = 0; i < n_elts; ++i)
12525 x = XVECEXP (vals, 0, i);
12526 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12527 all_same = false;
12530 if (!all_same)
12531 /* The elements are not all the same. We could handle repeating
12532 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12533 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12534 vdup.i16). */
12535 return NULL_RTX;
12537 /* We can load this constant by using VDUP and a constant in a
12538 single ARM register. This will be cheaper than a vector
12539 load. */
12541 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12542 return gen_rtx_VEC_DUPLICATE (mode, x);
12545 /* Generate code to load VALS, which is a PARALLEL containing only
12546 constants (for vec_init) or CONST_VECTOR, efficiently into a
12547 register. Returns an RTX to copy into the register, or NULL_RTX
12548 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12551 neon_make_constant (rtx vals)
12553 machine_mode mode = GET_MODE (vals);
12554 rtx target;
12555 rtx const_vec = NULL_RTX;
12556 int n_elts = GET_MODE_NUNITS (mode);
12557 int n_const = 0;
12558 int i;
12560 if (GET_CODE (vals) == CONST_VECTOR)
12561 const_vec = vals;
12562 else if (GET_CODE (vals) == PARALLEL)
12564 /* A CONST_VECTOR must contain only CONST_INTs and
12565 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12566 Only store valid constants in a CONST_VECTOR. */
12567 for (i = 0; i < n_elts; ++i)
12569 rtx x = XVECEXP (vals, 0, i);
12570 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12571 n_const++;
12573 if (n_const == n_elts)
12574 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12576 else
12577 gcc_unreachable ();
12579 if (const_vec != NULL
12580 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12581 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12582 return const_vec;
12583 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12584 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12585 pipeline cycle; creating the constant takes one or two ARM
12586 pipeline cycles. */
12587 return target;
12588 else if (const_vec != NULL_RTX)
12589 /* Load from constant pool. On Cortex-A8 this takes two cycles
12590 (for either double or quad vectors). We can not take advantage
12591 of single-cycle VLD1 because we need a PC-relative addressing
12592 mode. */
12593 return const_vec;
12594 else
12595 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12596 We can not construct an initializer. */
12597 return NULL_RTX;
12600 /* Initialize vector TARGET to VALS. */
12602 void
12603 neon_expand_vector_init (rtx target, rtx vals)
12605 machine_mode mode = GET_MODE (target);
12606 machine_mode inner_mode = GET_MODE_INNER (mode);
12607 int n_elts = GET_MODE_NUNITS (mode);
12608 int n_var = 0, one_var = -1;
12609 bool all_same = true;
12610 rtx x, mem;
12611 int i;
12613 for (i = 0; i < n_elts; ++i)
12615 x = XVECEXP (vals, 0, i);
12616 if (!CONSTANT_P (x))
12617 ++n_var, one_var = i;
12619 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12620 all_same = false;
12623 if (n_var == 0)
12625 rtx constant = neon_make_constant (vals);
12626 if (constant != NULL_RTX)
12628 emit_move_insn (target, constant);
12629 return;
12633 /* Splat a single non-constant element if we can. */
12634 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12636 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12637 emit_insn (gen_rtx_SET (VOIDmode, target,
12638 gen_rtx_VEC_DUPLICATE (mode, x)));
12639 return;
12642 /* One field is non-constant. Load constant then overwrite varying
12643 field. This is more efficient than using the stack. */
12644 if (n_var == 1)
12646 rtx copy = copy_rtx (vals);
12647 rtx index = GEN_INT (one_var);
12649 /* Load constant part of vector, substitute neighboring value for
12650 varying element. */
12651 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12652 neon_expand_vector_init (target, copy);
12654 /* Insert variable. */
12655 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12656 switch (mode)
12658 case V8QImode:
12659 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12660 break;
12661 case V16QImode:
12662 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12663 break;
12664 case V4HImode:
12665 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12666 break;
12667 case V8HImode:
12668 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12669 break;
12670 case V2SImode:
12671 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12672 break;
12673 case V4SImode:
12674 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12675 break;
12676 case V2SFmode:
12677 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12678 break;
12679 case V4SFmode:
12680 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12681 break;
12682 case V2DImode:
12683 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12684 break;
12685 default:
12686 gcc_unreachable ();
12688 return;
12691 /* Construct the vector in memory one field at a time
12692 and load the whole vector. */
12693 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12694 for (i = 0; i < n_elts; i++)
12695 emit_move_insn (adjust_address_nv (mem, inner_mode,
12696 i * GET_MODE_SIZE (inner_mode)),
12697 XVECEXP (vals, 0, i));
12698 emit_move_insn (target, mem);
12701 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12702 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12703 reported source locations are bogus. */
12705 static void
12706 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12707 const char *err)
12709 HOST_WIDE_INT lane;
12711 gcc_assert (CONST_INT_P (operand));
12713 lane = INTVAL (operand);
12715 if (lane < low || lane >= high)
12716 error (err);
12719 /* Bounds-check lanes. */
12721 void
12722 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12724 bounds_check (operand, low, high, "lane out of range");
12727 /* Bounds-check constants. */
12729 void
12730 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12732 bounds_check (operand, low, high, "constant out of range");
12735 HOST_WIDE_INT
12736 neon_element_bits (machine_mode mode)
12738 if (mode == DImode)
12739 return GET_MODE_BITSIZE (mode);
12740 else
12741 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12745 /* Predicates for `match_operand' and `match_operator'. */
12747 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12748 WB is true if full writeback address modes are allowed and is false
12749 if limited writeback address modes (POST_INC and PRE_DEC) are
12750 allowed. */
12753 arm_coproc_mem_operand (rtx op, bool wb)
12755 rtx ind;
12757 /* Reject eliminable registers. */
12758 if (! (reload_in_progress || reload_completed || lra_in_progress)
12759 && ( reg_mentioned_p (frame_pointer_rtx, op)
12760 || reg_mentioned_p (arg_pointer_rtx, op)
12761 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12762 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12763 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12764 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12765 return FALSE;
12767 /* Constants are converted into offsets from labels. */
12768 if (!MEM_P (op))
12769 return FALSE;
12771 ind = XEXP (op, 0);
12773 if (reload_completed
12774 && (GET_CODE (ind) == LABEL_REF
12775 || (GET_CODE (ind) == CONST
12776 && GET_CODE (XEXP (ind, 0)) == PLUS
12777 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12778 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12779 return TRUE;
12781 /* Match: (mem (reg)). */
12782 if (REG_P (ind))
12783 return arm_address_register_rtx_p (ind, 0);
12785 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12786 acceptable in any case (subject to verification by
12787 arm_address_register_rtx_p). We need WB to be true to accept
12788 PRE_INC and POST_DEC. */
12789 if (GET_CODE (ind) == POST_INC
12790 || GET_CODE (ind) == PRE_DEC
12791 || (wb
12792 && (GET_CODE (ind) == PRE_INC
12793 || GET_CODE (ind) == POST_DEC)))
12794 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12796 if (wb
12797 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12798 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12799 && GET_CODE (XEXP (ind, 1)) == PLUS
12800 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12801 ind = XEXP (ind, 1);
12803 /* Match:
12804 (plus (reg)
12805 (const)). */
12806 if (GET_CODE (ind) == PLUS
12807 && REG_P (XEXP (ind, 0))
12808 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12809 && CONST_INT_P (XEXP (ind, 1))
12810 && INTVAL (XEXP (ind, 1)) > -1024
12811 && INTVAL (XEXP (ind, 1)) < 1024
12812 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12813 return TRUE;
12815 return FALSE;
12818 /* Return TRUE if OP is a memory operand which we can load or store a vector
12819 to/from. TYPE is one of the following values:
12820 0 - Vector load/stor (vldr)
12821 1 - Core registers (ldm)
12822 2 - Element/structure loads (vld1)
12825 neon_vector_mem_operand (rtx op, int type, bool strict)
12827 rtx ind;
12829 /* Reject eliminable registers. */
12830 if (! (reload_in_progress || reload_completed)
12831 && ( reg_mentioned_p (frame_pointer_rtx, op)
12832 || reg_mentioned_p (arg_pointer_rtx, op)
12833 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12834 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12835 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12836 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12837 return !strict;
12839 /* Constants are converted into offsets from labels. */
12840 if (!MEM_P (op))
12841 return FALSE;
12843 ind = XEXP (op, 0);
12845 if (reload_completed
12846 && (GET_CODE (ind) == LABEL_REF
12847 || (GET_CODE (ind) == CONST
12848 && GET_CODE (XEXP (ind, 0)) == PLUS
12849 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12850 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12851 return TRUE;
12853 /* Match: (mem (reg)). */
12854 if (REG_P (ind))
12855 return arm_address_register_rtx_p (ind, 0);
12857 /* Allow post-increment with Neon registers. */
12858 if ((type != 1 && GET_CODE (ind) == POST_INC)
12859 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12860 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12862 /* Allow post-increment by register for VLDn */
12863 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12864 && GET_CODE (XEXP (ind, 1)) == PLUS
12865 && REG_P (XEXP (XEXP (ind, 1), 1)))
12866 return true;
12868 /* Match:
12869 (plus (reg)
12870 (const)). */
12871 if (type == 0
12872 && GET_CODE (ind) == PLUS
12873 && REG_P (XEXP (ind, 0))
12874 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12875 && CONST_INT_P (XEXP (ind, 1))
12876 && INTVAL (XEXP (ind, 1)) > -1024
12877 /* For quad modes, we restrict the constant offset to be slightly less
12878 than what the instruction format permits. We have no such constraint
12879 on double mode offsets. (This must match arm_legitimate_index_p.) */
12880 && (INTVAL (XEXP (ind, 1))
12881 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12882 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12883 return TRUE;
12885 return FALSE;
12888 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12889 type. */
12891 neon_struct_mem_operand (rtx op)
12893 rtx ind;
12895 /* Reject eliminable registers. */
12896 if (! (reload_in_progress || reload_completed)
12897 && ( reg_mentioned_p (frame_pointer_rtx, op)
12898 || reg_mentioned_p (arg_pointer_rtx, op)
12899 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12900 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12901 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12902 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12903 return FALSE;
12905 /* Constants are converted into offsets from labels. */
12906 if (!MEM_P (op))
12907 return FALSE;
12909 ind = XEXP (op, 0);
12911 if (reload_completed
12912 && (GET_CODE (ind) == LABEL_REF
12913 || (GET_CODE (ind) == CONST
12914 && GET_CODE (XEXP (ind, 0)) == PLUS
12915 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12916 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12917 return TRUE;
12919 /* Match: (mem (reg)). */
12920 if (REG_P (ind))
12921 return arm_address_register_rtx_p (ind, 0);
12923 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12924 if (GET_CODE (ind) == POST_INC
12925 || GET_CODE (ind) == PRE_DEC)
12926 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12928 return FALSE;
12931 /* Return true if X is a register that will be eliminated later on. */
12933 arm_eliminable_register (rtx x)
12935 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12936 || REGNO (x) == ARG_POINTER_REGNUM
12937 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12938 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12941 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12942 coprocessor registers. Otherwise return NO_REGS. */
12944 enum reg_class
12945 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12947 if (mode == HFmode)
12949 if (!TARGET_NEON_FP16)
12950 return GENERAL_REGS;
12951 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12952 return NO_REGS;
12953 return GENERAL_REGS;
12956 /* The neon move patterns handle all legitimate vector and struct
12957 addresses. */
12958 if (TARGET_NEON
12959 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12960 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12961 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12962 || VALID_NEON_STRUCT_MODE (mode)))
12963 return NO_REGS;
12965 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12966 return NO_REGS;
12968 return GENERAL_REGS;
12971 /* Values which must be returned in the most-significant end of the return
12972 register. */
12974 static bool
12975 arm_return_in_msb (const_tree valtype)
12977 return (TARGET_AAPCS_BASED
12978 && BYTES_BIG_ENDIAN
12979 && (AGGREGATE_TYPE_P (valtype)
12980 || TREE_CODE (valtype) == COMPLEX_TYPE
12981 || FIXED_POINT_TYPE_P (valtype)));
12984 /* Return TRUE if X references a SYMBOL_REF. */
12986 symbol_mentioned_p (rtx x)
12988 const char * fmt;
12989 int i;
12991 if (GET_CODE (x) == SYMBOL_REF)
12992 return 1;
12994 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12995 are constant offsets, not symbols. */
12996 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12997 return 0;
12999 fmt = GET_RTX_FORMAT (GET_CODE (x));
13001 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13003 if (fmt[i] == 'E')
13005 int j;
13007 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13008 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13009 return 1;
13011 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13012 return 1;
13015 return 0;
13018 /* Return TRUE if X references a LABEL_REF. */
13020 label_mentioned_p (rtx x)
13022 const char * fmt;
13023 int i;
13025 if (GET_CODE (x) == LABEL_REF)
13026 return 1;
13028 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13029 instruction, but they are constant offsets, not symbols. */
13030 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13031 return 0;
13033 fmt = GET_RTX_FORMAT (GET_CODE (x));
13034 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13036 if (fmt[i] == 'E')
13038 int j;
13040 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13041 if (label_mentioned_p (XVECEXP (x, i, j)))
13042 return 1;
13044 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13045 return 1;
13048 return 0;
13052 tls_mentioned_p (rtx x)
13054 switch (GET_CODE (x))
13056 case CONST:
13057 return tls_mentioned_p (XEXP (x, 0));
13059 case UNSPEC:
13060 if (XINT (x, 1) == UNSPEC_TLS)
13061 return 1;
13063 default:
13064 return 0;
13068 /* Must not copy any rtx that uses a pc-relative address. */
13070 static bool
13071 arm_cannot_copy_insn_p (rtx_insn *insn)
13073 /* The tls call insn cannot be copied, as it is paired with a data
13074 word. */
13075 if (recog_memoized (insn) == CODE_FOR_tlscall)
13076 return true;
13078 subrtx_iterator::array_type array;
13079 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13081 const_rtx x = *iter;
13082 if (GET_CODE (x) == UNSPEC
13083 && (XINT (x, 1) == UNSPEC_PIC_BASE
13084 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13085 return true;
13087 return false;
13090 enum rtx_code
13091 minmax_code (rtx x)
13093 enum rtx_code code = GET_CODE (x);
13095 switch (code)
13097 case SMAX:
13098 return GE;
13099 case SMIN:
13100 return LE;
13101 case UMIN:
13102 return LEU;
13103 case UMAX:
13104 return GEU;
13105 default:
13106 gcc_unreachable ();
13110 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13112 bool
13113 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13114 int *mask, bool *signed_sat)
13116 /* The high bound must be a power of two minus one. */
13117 int log = exact_log2 (INTVAL (hi_bound) + 1);
13118 if (log == -1)
13119 return false;
13121 /* The low bound is either zero (for usat) or one less than the
13122 negation of the high bound (for ssat). */
13123 if (INTVAL (lo_bound) == 0)
13125 if (mask)
13126 *mask = log;
13127 if (signed_sat)
13128 *signed_sat = false;
13130 return true;
13133 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13135 if (mask)
13136 *mask = log + 1;
13137 if (signed_sat)
13138 *signed_sat = true;
13140 return true;
13143 return false;
13146 /* Return 1 if memory locations are adjacent. */
13148 adjacent_mem_locations (rtx a, rtx b)
13150 /* We don't guarantee to preserve the order of these memory refs. */
13151 if (volatile_refs_p (a) || volatile_refs_p (b))
13152 return 0;
13154 if ((REG_P (XEXP (a, 0))
13155 || (GET_CODE (XEXP (a, 0)) == PLUS
13156 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13157 && (REG_P (XEXP (b, 0))
13158 || (GET_CODE (XEXP (b, 0)) == PLUS
13159 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13161 HOST_WIDE_INT val0 = 0, val1 = 0;
13162 rtx reg0, reg1;
13163 int val_diff;
13165 if (GET_CODE (XEXP (a, 0)) == PLUS)
13167 reg0 = XEXP (XEXP (a, 0), 0);
13168 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13170 else
13171 reg0 = XEXP (a, 0);
13173 if (GET_CODE (XEXP (b, 0)) == PLUS)
13175 reg1 = XEXP (XEXP (b, 0), 0);
13176 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13178 else
13179 reg1 = XEXP (b, 0);
13181 /* Don't accept any offset that will require multiple
13182 instructions to handle, since this would cause the
13183 arith_adjacentmem pattern to output an overlong sequence. */
13184 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13185 return 0;
13187 /* Don't allow an eliminable register: register elimination can make
13188 the offset too large. */
13189 if (arm_eliminable_register (reg0))
13190 return 0;
13192 val_diff = val1 - val0;
13194 if (arm_ld_sched)
13196 /* If the target has load delay slots, then there's no benefit
13197 to using an ldm instruction unless the offset is zero and
13198 we are optimizing for size. */
13199 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13200 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13201 && (val_diff == 4 || val_diff == -4));
13204 return ((REGNO (reg0) == REGNO (reg1))
13205 && (val_diff == 4 || val_diff == -4));
13208 return 0;
13211 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13212 for load operations, false for store operations. CONSECUTIVE is true
13213 if the register numbers in the operation must be consecutive in the register
13214 bank. RETURN_PC is true if value is to be loaded in PC.
13215 The pattern we are trying to match for load is:
13216 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13217 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13220 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13222 where
13223 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13224 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13225 3. If consecutive is TRUE, then for kth register being loaded,
13226 REGNO (R_dk) = REGNO (R_d0) + k.
13227 The pattern for store is similar. */
13228 bool
13229 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13230 bool consecutive, bool return_pc)
13232 HOST_WIDE_INT count = XVECLEN (op, 0);
13233 rtx reg, mem, addr;
13234 unsigned regno;
13235 unsigned first_regno;
13236 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13237 rtx elt;
13238 bool addr_reg_in_reglist = false;
13239 bool update = false;
13240 int reg_increment;
13241 int offset_adj;
13242 int regs_per_val;
13244 /* If not in SImode, then registers must be consecutive
13245 (e.g., VLDM instructions for DFmode). */
13246 gcc_assert ((mode == SImode) || consecutive);
13247 /* Setting return_pc for stores is illegal. */
13248 gcc_assert (!return_pc || load);
13250 /* Set up the increments and the regs per val based on the mode. */
13251 reg_increment = GET_MODE_SIZE (mode);
13252 regs_per_val = reg_increment / 4;
13253 offset_adj = return_pc ? 1 : 0;
13255 if (count <= 1
13256 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13257 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13258 return false;
13260 /* Check if this is a write-back. */
13261 elt = XVECEXP (op, 0, offset_adj);
13262 if (GET_CODE (SET_SRC (elt)) == PLUS)
13264 i++;
13265 base = 1;
13266 update = true;
13268 /* The offset adjustment must be the number of registers being
13269 popped times the size of a single register. */
13270 if (!REG_P (SET_DEST (elt))
13271 || !REG_P (XEXP (SET_SRC (elt), 0))
13272 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13273 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13274 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13275 ((count - 1 - offset_adj) * reg_increment))
13276 return false;
13279 i = i + offset_adj;
13280 base = base + offset_adj;
13281 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13282 success depends on the type: VLDM can do just one reg,
13283 LDM must do at least two. */
13284 if ((count <= i) && (mode == SImode))
13285 return false;
13287 elt = XVECEXP (op, 0, i - 1);
13288 if (GET_CODE (elt) != SET)
13289 return false;
13291 if (load)
13293 reg = SET_DEST (elt);
13294 mem = SET_SRC (elt);
13296 else
13298 reg = SET_SRC (elt);
13299 mem = SET_DEST (elt);
13302 if (!REG_P (reg) || !MEM_P (mem))
13303 return false;
13305 regno = REGNO (reg);
13306 first_regno = regno;
13307 addr = XEXP (mem, 0);
13308 if (GET_CODE (addr) == PLUS)
13310 if (!CONST_INT_P (XEXP (addr, 1)))
13311 return false;
13313 offset = INTVAL (XEXP (addr, 1));
13314 addr = XEXP (addr, 0);
13317 if (!REG_P (addr))
13318 return false;
13320 /* Don't allow SP to be loaded unless it is also the base register. It
13321 guarantees that SP is reset correctly when an LDM instruction
13322 is interrupted. Otherwise, we might end up with a corrupt stack. */
13323 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13324 return false;
13326 for (; i < count; i++)
13328 elt = XVECEXP (op, 0, i);
13329 if (GET_CODE (elt) != SET)
13330 return false;
13332 if (load)
13334 reg = SET_DEST (elt);
13335 mem = SET_SRC (elt);
13337 else
13339 reg = SET_SRC (elt);
13340 mem = SET_DEST (elt);
13343 if (!REG_P (reg)
13344 || GET_MODE (reg) != mode
13345 || REGNO (reg) <= regno
13346 || (consecutive
13347 && (REGNO (reg) !=
13348 (unsigned int) (first_regno + regs_per_val * (i - base))))
13349 /* Don't allow SP to be loaded unless it is also the base register. It
13350 guarantees that SP is reset correctly when an LDM instruction
13351 is interrupted. Otherwise, we might end up with a corrupt stack. */
13352 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13353 || !MEM_P (mem)
13354 || GET_MODE (mem) != mode
13355 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13356 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13357 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13358 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13359 offset + (i - base) * reg_increment))
13360 && (!REG_P (XEXP (mem, 0))
13361 || offset + (i - base) * reg_increment != 0)))
13362 return false;
13364 regno = REGNO (reg);
13365 if (regno == REGNO (addr))
13366 addr_reg_in_reglist = true;
13369 if (load)
13371 if (update && addr_reg_in_reglist)
13372 return false;
13374 /* For Thumb-1, address register is always modified - either by write-back
13375 or by explicit load. If the pattern does not describe an update,
13376 then the address register must be in the list of loaded registers. */
13377 if (TARGET_THUMB1)
13378 return update || addr_reg_in_reglist;
13381 return true;
13384 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13385 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13386 instruction. ADD_OFFSET is nonzero if the base address register needs
13387 to be modified with an add instruction before we can use it. */
13389 static bool
13390 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13391 int nops, HOST_WIDE_INT add_offset)
13393 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13394 if the offset isn't small enough. The reason 2 ldrs are faster
13395 is because these ARMs are able to do more than one cache access
13396 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13397 whilst the ARM8 has a double bandwidth cache. This means that
13398 these cores can do both an instruction fetch and a data fetch in
13399 a single cycle, so the trick of calculating the address into a
13400 scratch register (one of the result regs) and then doing a load
13401 multiple actually becomes slower (and no smaller in code size).
13402 That is the transformation
13404 ldr rd1, [rbase + offset]
13405 ldr rd2, [rbase + offset + 4]
13409 add rd1, rbase, offset
13410 ldmia rd1, {rd1, rd2}
13412 produces worse code -- '3 cycles + any stalls on rd2' instead of
13413 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13414 access per cycle, the first sequence could never complete in less
13415 than 6 cycles, whereas the ldm sequence would only take 5 and
13416 would make better use of sequential accesses if not hitting the
13417 cache.
13419 We cheat here and test 'arm_ld_sched' which we currently know to
13420 only be true for the ARM8, ARM9 and StrongARM. If this ever
13421 changes, then the test below needs to be reworked. */
13422 if (nops == 2 && arm_ld_sched && add_offset != 0)
13423 return false;
13425 /* XScale has load-store double instructions, but they have stricter
13426 alignment requirements than load-store multiple, so we cannot
13427 use them.
13429 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13430 the pipeline until completion.
13432 NREGS CYCLES
13438 An ldr instruction takes 1-3 cycles, but does not block the
13439 pipeline.
13441 NREGS CYCLES
13442 1 1-3
13443 2 2-6
13444 3 3-9
13445 4 4-12
13447 Best case ldr will always win. However, the more ldr instructions
13448 we issue, the less likely we are to be able to schedule them well.
13449 Using ldr instructions also increases code size.
13451 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13452 for counts of 3 or 4 regs. */
13453 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13454 return false;
13455 return true;
13458 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13459 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13460 an array ORDER which describes the sequence to use when accessing the
13461 offsets that produces an ascending order. In this sequence, each
13462 offset must be larger by exactly 4 than the previous one. ORDER[0]
13463 must have been filled in with the lowest offset by the caller.
13464 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13465 we use to verify that ORDER produces an ascending order of registers.
13466 Return true if it was possible to construct such an order, false if
13467 not. */
13469 static bool
13470 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13471 int *unsorted_regs)
13473 int i;
13474 for (i = 1; i < nops; i++)
13476 int j;
13478 order[i] = order[i - 1];
13479 for (j = 0; j < nops; j++)
13480 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13482 /* We must find exactly one offset that is higher than the
13483 previous one by 4. */
13484 if (order[i] != order[i - 1])
13485 return false;
13486 order[i] = j;
13488 if (order[i] == order[i - 1])
13489 return false;
13490 /* The register numbers must be ascending. */
13491 if (unsorted_regs != NULL
13492 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13493 return false;
13495 return true;
13498 /* Used to determine in a peephole whether a sequence of load
13499 instructions can be changed into a load-multiple instruction.
13500 NOPS is the number of separate load instructions we are examining. The
13501 first NOPS entries in OPERANDS are the destination registers, the
13502 next NOPS entries are memory operands. If this function is
13503 successful, *BASE is set to the common base register of the memory
13504 accesses; *LOAD_OFFSET is set to the first memory location's offset
13505 from that base register.
13506 REGS is an array filled in with the destination register numbers.
13507 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13508 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13509 the sequence of registers in REGS matches the loads from ascending memory
13510 locations, and the function verifies that the register numbers are
13511 themselves ascending. If CHECK_REGS is false, the register numbers
13512 are stored in the order they are found in the operands. */
13513 static int
13514 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13515 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13517 int unsorted_regs[MAX_LDM_STM_OPS];
13518 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13519 int order[MAX_LDM_STM_OPS];
13520 rtx base_reg_rtx = NULL;
13521 int base_reg = -1;
13522 int i, ldm_case;
13524 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13525 easily extended if required. */
13526 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13528 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13530 /* Loop over the operands and check that the memory references are
13531 suitable (i.e. immediate offsets from the same base register). At
13532 the same time, extract the target register, and the memory
13533 offsets. */
13534 for (i = 0; i < nops; i++)
13536 rtx reg;
13537 rtx offset;
13539 /* Convert a subreg of a mem into the mem itself. */
13540 if (GET_CODE (operands[nops + i]) == SUBREG)
13541 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13543 gcc_assert (MEM_P (operands[nops + i]));
13545 /* Don't reorder volatile memory references; it doesn't seem worth
13546 looking for the case where the order is ok anyway. */
13547 if (MEM_VOLATILE_P (operands[nops + i]))
13548 return 0;
13550 offset = const0_rtx;
13552 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13553 || (GET_CODE (reg) == SUBREG
13554 && REG_P (reg = SUBREG_REG (reg))))
13555 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13556 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13557 || (GET_CODE (reg) == SUBREG
13558 && REG_P (reg = SUBREG_REG (reg))))
13559 && (CONST_INT_P (offset
13560 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13562 if (i == 0)
13564 base_reg = REGNO (reg);
13565 base_reg_rtx = reg;
13566 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13567 return 0;
13569 else if (base_reg != (int) REGNO (reg))
13570 /* Not addressed from the same base register. */
13571 return 0;
13573 unsorted_regs[i] = (REG_P (operands[i])
13574 ? REGNO (operands[i])
13575 : REGNO (SUBREG_REG (operands[i])));
13577 /* If it isn't an integer register, or if it overwrites the
13578 base register but isn't the last insn in the list, then
13579 we can't do this. */
13580 if (unsorted_regs[i] < 0
13581 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13582 || unsorted_regs[i] > 14
13583 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13584 return 0;
13586 /* Don't allow SP to be loaded unless it is also the base
13587 register. It guarantees that SP is reset correctly when
13588 an LDM instruction is interrupted. Otherwise, we might
13589 end up with a corrupt stack. */
13590 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13591 return 0;
13593 unsorted_offsets[i] = INTVAL (offset);
13594 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13595 order[0] = i;
13597 else
13598 /* Not a suitable memory address. */
13599 return 0;
13602 /* All the useful information has now been extracted from the
13603 operands into unsorted_regs and unsorted_offsets; additionally,
13604 order[0] has been set to the lowest offset in the list. Sort
13605 the offsets into order, verifying that they are adjacent, and
13606 check that the register numbers are ascending. */
13607 if (!compute_offset_order (nops, unsorted_offsets, order,
13608 check_regs ? unsorted_regs : NULL))
13609 return 0;
13611 if (saved_order)
13612 memcpy (saved_order, order, sizeof order);
13614 if (base)
13616 *base = base_reg;
13618 for (i = 0; i < nops; i++)
13619 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13621 *load_offset = unsorted_offsets[order[0]];
13624 if (TARGET_THUMB1
13625 && !peep2_reg_dead_p (nops, base_reg_rtx))
13626 return 0;
13628 if (unsorted_offsets[order[0]] == 0)
13629 ldm_case = 1; /* ldmia */
13630 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13631 ldm_case = 2; /* ldmib */
13632 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13633 ldm_case = 3; /* ldmda */
13634 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13635 ldm_case = 4; /* ldmdb */
13636 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13637 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13638 ldm_case = 5;
13639 else
13640 return 0;
13642 if (!multiple_operation_profitable_p (false, nops,
13643 ldm_case == 5
13644 ? unsorted_offsets[order[0]] : 0))
13645 return 0;
13647 return ldm_case;
13650 /* Used to determine in a peephole whether a sequence of store instructions can
13651 be changed into a store-multiple instruction.
13652 NOPS is the number of separate store instructions we are examining.
13653 NOPS_TOTAL is the total number of instructions recognized by the peephole
13654 pattern.
13655 The first NOPS entries in OPERANDS are the source registers, the next
13656 NOPS entries are memory operands. If this function is successful, *BASE is
13657 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13658 to the first memory location's offset from that base register. REGS is an
13659 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13660 likewise filled with the corresponding rtx's.
13661 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13662 numbers to an ascending order of stores.
13663 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13664 from ascending memory locations, and the function verifies that the register
13665 numbers are themselves ascending. If CHECK_REGS is false, the register
13666 numbers are stored in the order they are found in the operands. */
13667 static int
13668 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13669 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13670 HOST_WIDE_INT *load_offset, bool check_regs)
13672 int unsorted_regs[MAX_LDM_STM_OPS];
13673 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13674 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13675 int order[MAX_LDM_STM_OPS];
13676 int base_reg = -1;
13677 rtx base_reg_rtx = NULL;
13678 int i, stm_case;
13680 /* Write back of base register is currently only supported for Thumb 1. */
13681 int base_writeback = TARGET_THUMB1;
13683 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13684 easily extended if required. */
13685 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13687 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13689 /* Loop over the operands and check that the memory references are
13690 suitable (i.e. immediate offsets from the same base register). At
13691 the same time, extract the target register, and the memory
13692 offsets. */
13693 for (i = 0; i < nops; i++)
13695 rtx reg;
13696 rtx offset;
13698 /* Convert a subreg of a mem into the mem itself. */
13699 if (GET_CODE (operands[nops + i]) == SUBREG)
13700 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13702 gcc_assert (MEM_P (operands[nops + i]));
13704 /* Don't reorder volatile memory references; it doesn't seem worth
13705 looking for the case where the order is ok anyway. */
13706 if (MEM_VOLATILE_P (operands[nops + i]))
13707 return 0;
13709 offset = const0_rtx;
13711 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13712 || (GET_CODE (reg) == SUBREG
13713 && REG_P (reg = SUBREG_REG (reg))))
13714 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13715 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13716 || (GET_CODE (reg) == SUBREG
13717 && REG_P (reg = SUBREG_REG (reg))))
13718 && (CONST_INT_P (offset
13719 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13721 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13722 ? operands[i] : SUBREG_REG (operands[i]));
13723 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13725 if (i == 0)
13727 base_reg = REGNO (reg);
13728 base_reg_rtx = reg;
13729 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13730 return 0;
13732 else if (base_reg != (int) REGNO (reg))
13733 /* Not addressed from the same base register. */
13734 return 0;
13736 /* If it isn't an integer register, then we can't do this. */
13737 if (unsorted_regs[i] < 0
13738 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13739 /* The effects are unpredictable if the base register is
13740 both updated and stored. */
13741 || (base_writeback && unsorted_regs[i] == base_reg)
13742 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13743 || unsorted_regs[i] > 14)
13744 return 0;
13746 unsorted_offsets[i] = INTVAL (offset);
13747 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13748 order[0] = i;
13750 else
13751 /* Not a suitable memory address. */
13752 return 0;
13755 /* All the useful information has now been extracted from the
13756 operands into unsorted_regs and unsorted_offsets; additionally,
13757 order[0] has been set to the lowest offset in the list. Sort
13758 the offsets into order, verifying that they are adjacent, and
13759 check that the register numbers are ascending. */
13760 if (!compute_offset_order (nops, unsorted_offsets, order,
13761 check_regs ? unsorted_regs : NULL))
13762 return 0;
13764 if (saved_order)
13765 memcpy (saved_order, order, sizeof order);
13767 if (base)
13769 *base = base_reg;
13771 for (i = 0; i < nops; i++)
13773 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13774 if (reg_rtxs)
13775 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13778 *load_offset = unsorted_offsets[order[0]];
13781 if (TARGET_THUMB1
13782 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13783 return 0;
13785 if (unsorted_offsets[order[0]] == 0)
13786 stm_case = 1; /* stmia */
13787 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13788 stm_case = 2; /* stmib */
13789 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13790 stm_case = 3; /* stmda */
13791 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13792 stm_case = 4; /* stmdb */
13793 else
13794 return 0;
13796 if (!multiple_operation_profitable_p (false, nops, 0))
13797 return 0;
13799 return stm_case;
13802 /* Routines for use in generating RTL. */
13804 /* Generate a load-multiple instruction. COUNT is the number of loads in
13805 the instruction; REGS and MEMS are arrays containing the operands.
13806 BASEREG is the base register to be used in addressing the memory operands.
13807 WBACK_OFFSET is nonzero if the instruction should update the base
13808 register. */
13810 static rtx
13811 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13812 HOST_WIDE_INT wback_offset)
13814 int i = 0, j;
13815 rtx result;
13817 if (!multiple_operation_profitable_p (false, count, 0))
13819 rtx seq;
13821 start_sequence ();
13823 for (i = 0; i < count; i++)
13824 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13826 if (wback_offset != 0)
13827 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13829 seq = get_insns ();
13830 end_sequence ();
13832 return seq;
13835 result = gen_rtx_PARALLEL (VOIDmode,
13836 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13837 if (wback_offset != 0)
13839 XVECEXP (result, 0, 0)
13840 = gen_rtx_SET (VOIDmode, basereg,
13841 plus_constant (Pmode, basereg, wback_offset));
13842 i = 1;
13843 count++;
13846 for (j = 0; i < count; i++, j++)
13847 XVECEXP (result, 0, i)
13848 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13850 return result;
13853 /* Generate a store-multiple instruction. COUNT is the number of stores in
13854 the instruction; REGS and MEMS are arrays containing the operands.
13855 BASEREG is the base register to be used in addressing the memory operands.
13856 WBACK_OFFSET is nonzero if the instruction should update the base
13857 register. */
13859 static rtx
13860 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13861 HOST_WIDE_INT wback_offset)
13863 int i = 0, j;
13864 rtx result;
13866 if (GET_CODE (basereg) == PLUS)
13867 basereg = XEXP (basereg, 0);
13869 if (!multiple_operation_profitable_p (false, count, 0))
13871 rtx seq;
13873 start_sequence ();
13875 for (i = 0; i < count; i++)
13876 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13878 if (wback_offset != 0)
13879 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13881 seq = get_insns ();
13882 end_sequence ();
13884 return seq;
13887 result = gen_rtx_PARALLEL (VOIDmode,
13888 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13889 if (wback_offset != 0)
13891 XVECEXP (result, 0, 0)
13892 = gen_rtx_SET (VOIDmode, basereg,
13893 plus_constant (Pmode, basereg, wback_offset));
13894 i = 1;
13895 count++;
13898 for (j = 0; i < count; i++, j++)
13899 XVECEXP (result, 0, i)
13900 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
13902 return result;
13905 /* Generate either a load-multiple or a store-multiple instruction. This
13906 function can be used in situations where we can start with a single MEM
13907 rtx and adjust its address upwards.
13908 COUNT is the number of operations in the instruction, not counting a
13909 possible update of the base register. REGS is an array containing the
13910 register operands.
13911 BASEREG is the base register to be used in addressing the memory operands,
13912 which are constructed from BASEMEM.
13913 WRITE_BACK specifies whether the generated instruction should include an
13914 update of the base register.
13915 OFFSETP is used to pass an offset to and from this function; this offset
13916 is not used when constructing the address (instead BASEMEM should have an
13917 appropriate offset in its address), it is used only for setting
13918 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13920 static rtx
13921 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13922 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13924 rtx mems[MAX_LDM_STM_OPS];
13925 HOST_WIDE_INT offset = *offsetp;
13926 int i;
13928 gcc_assert (count <= MAX_LDM_STM_OPS);
13930 if (GET_CODE (basereg) == PLUS)
13931 basereg = XEXP (basereg, 0);
13933 for (i = 0; i < count; i++)
13935 rtx addr = plus_constant (Pmode, basereg, i * 4);
13936 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13937 offset += 4;
13940 if (write_back)
13941 *offsetp = offset;
13943 if (is_load)
13944 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13945 write_back ? 4 * count : 0);
13946 else
13947 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13948 write_back ? 4 * count : 0);
13952 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13953 rtx basemem, HOST_WIDE_INT *offsetp)
13955 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13956 offsetp);
13960 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13961 rtx basemem, HOST_WIDE_INT *offsetp)
13963 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13964 offsetp);
13967 /* Called from a peephole2 expander to turn a sequence of loads into an
13968 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13969 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13970 is true if we can reorder the registers because they are used commutatively
13971 subsequently.
13972 Returns true iff we could generate a new instruction. */
13974 bool
13975 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13977 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13978 rtx mems[MAX_LDM_STM_OPS];
13979 int i, j, base_reg;
13980 rtx base_reg_rtx;
13981 HOST_WIDE_INT offset;
13982 int write_back = FALSE;
13983 int ldm_case;
13984 rtx addr;
13986 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13987 &base_reg, &offset, !sort_regs);
13989 if (ldm_case == 0)
13990 return false;
13992 if (sort_regs)
13993 for (i = 0; i < nops - 1; i++)
13994 for (j = i + 1; j < nops; j++)
13995 if (regs[i] > regs[j])
13997 int t = regs[i];
13998 regs[i] = regs[j];
13999 regs[j] = t;
14001 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14003 if (TARGET_THUMB1)
14005 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14006 gcc_assert (ldm_case == 1 || ldm_case == 5);
14007 write_back = TRUE;
14010 if (ldm_case == 5)
14012 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14013 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14014 offset = 0;
14015 if (!TARGET_THUMB1)
14017 base_reg = regs[0];
14018 base_reg_rtx = newbase;
14022 for (i = 0; i < nops; i++)
14024 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14025 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14026 SImode, addr, 0);
14028 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14029 write_back ? offset + i * 4 : 0));
14030 return true;
14033 /* Called from a peephole2 expander to turn a sequence of stores into an
14034 STM instruction. OPERANDS are the operands found by the peephole matcher;
14035 NOPS indicates how many separate stores we are trying to combine.
14036 Returns true iff we could generate a new instruction. */
14038 bool
14039 gen_stm_seq (rtx *operands, int nops)
14041 int i;
14042 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14043 rtx mems[MAX_LDM_STM_OPS];
14044 int base_reg;
14045 rtx base_reg_rtx;
14046 HOST_WIDE_INT offset;
14047 int write_back = FALSE;
14048 int stm_case;
14049 rtx addr;
14050 bool base_reg_dies;
14052 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14053 mem_order, &base_reg, &offset, true);
14055 if (stm_case == 0)
14056 return false;
14058 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14060 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14061 if (TARGET_THUMB1)
14063 gcc_assert (base_reg_dies);
14064 write_back = TRUE;
14067 if (stm_case == 5)
14069 gcc_assert (base_reg_dies);
14070 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14071 offset = 0;
14074 addr = plus_constant (Pmode, base_reg_rtx, offset);
14076 for (i = 0; i < nops; i++)
14078 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14079 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14080 SImode, addr, 0);
14082 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14083 write_back ? offset + i * 4 : 0));
14084 return true;
14087 /* Called from a peephole2 expander to turn a sequence of stores that are
14088 preceded by constant loads into an STM instruction. OPERANDS are the
14089 operands found by the peephole matcher; NOPS indicates how many
14090 separate stores we are trying to combine; there are 2 * NOPS
14091 instructions in the peephole.
14092 Returns true iff we could generate a new instruction. */
14094 bool
14095 gen_const_stm_seq (rtx *operands, int nops)
14097 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14098 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14099 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14100 rtx mems[MAX_LDM_STM_OPS];
14101 int base_reg;
14102 rtx base_reg_rtx;
14103 HOST_WIDE_INT offset;
14104 int write_back = FALSE;
14105 int stm_case;
14106 rtx addr;
14107 bool base_reg_dies;
14108 int i, j;
14109 HARD_REG_SET allocated;
14111 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14112 mem_order, &base_reg, &offset, false);
14114 if (stm_case == 0)
14115 return false;
14117 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14119 /* If the same register is used more than once, try to find a free
14120 register. */
14121 CLEAR_HARD_REG_SET (allocated);
14122 for (i = 0; i < nops; i++)
14124 for (j = i + 1; j < nops; j++)
14125 if (regs[i] == regs[j])
14127 rtx t = peep2_find_free_register (0, nops * 2,
14128 TARGET_THUMB1 ? "l" : "r",
14129 SImode, &allocated);
14130 if (t == NULL_RTX)
14131 return false;
14132 reg_rtxs[i] = t;
14133 regs[i] = REGNO (t);
14137 /* Compute an ordering that maps the register numbers to an ascending
14138 sequence. */
14139 reg_order[0] = 0;
14140 for (i = 0; i < nops; i++)
14141 if (regs[i] < regs[reg_order[0]])
14142 reg_order[0] = i;
14144 for (i = 1; i < nops; i++)
14146 int this_order = reg_order[i - 1];
14147 for (j = 0; j < nops; j++)
14148 if (regs[j] > regs[reg_order[i - 1]]
14149 && (this_order == reg_order[i - 1]
14150 || regs[j] < regs[this_order]))
14151 this_order = j;
14152 reg_order[i] = this_order;
14155 /* Ensure that registers that must be live after the instruction end
14156 up with the correct value. */
14157 for (i = 0; i < nops; i++)
14159 int this_order = reg_order[i];
14160 if ((this_order != mem_order[i]
14161 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14162 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14163 return false;
14166 /* Load the constants. */
14167 for (i = 0; i < nops; i++)
14169 rtx op = operands[2 * nops + mem_order[i]];
14170 sorted_regs[i] = regs[reg_order[i]];
14171 emit_move_insn (reg_rtxs[reg_order[i]], op);
14174 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14176 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14177 if (TARGET_THUMB1)
14179 gcc_assert (base_reg_dies);
14180 write_back = TRUE;
14183 if (stm_case == 5)
14185 gcc_assert (base_reg_dies);
14186 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14187 offset = 0;
14190 addr = plus_constant (Pmode, base_reg_rtx, offset);
14192 for (i = 0; i < nops; i++)
14194 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14195 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14196 SImode, addr, 0);
14198 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14199 write_back ? offset + i * 4 : 0));
14200 return true;
14203 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14204 unaligned copies on processors which support unaligned semantics for those
14205 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14206 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14207 An interleave factor of 1 (the minimum) will perform no interleaving.
14208 Load/store multiple are used for aligned addresses where possible. */
14210 static void
14211 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14212 HOST_WIDE_INT length,
14213 unsigned int interleave_factor)
14215 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14216 int *regnos = XALLOCAVEC (int, interleave_factor);
14217 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14218 HOST_WIDE_INT i, j;
14219 HOST_WIDE_INT remaining = length, words;
14220 rtx halfword_tmp = NULL, byte_tmp = NULL;
14221 rtx dst, src;
14222 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14223 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14224 HOST_WIDE_INT srcoffset, dstoffset;
14225 HOST_WIDE_INT src_autoinc, dst_autoinc;
14226 rtx mem, addr;
14228 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14230 /* Use hard registers if we have aligned source or destination so we can use
14231 load/store multiple with contiguous registers. */
14232 if (dst_aligned || src_aligned)
14233 for (i = 0; i < interleave_factor; i++)
14234 regs[i] = gen_rtx_REG (SImode, i);
14235 else
14236 for (i = 0; i < interleave_factor; i++)
14237 regs[i] = gen_reg_rtx (SImode);
14239 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14240 src = copy_addr_to_reg (XEXP (srcbase, 0));
14242 srcoffset = dstoffset = 0;
14244 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14245 For copying the last bytes we want to subtract this offset again. */
14246 src_autoinc = dst_autoinc = 0;
14248 for (i = 0; i < interleave_factor; i++)
14249 regnos[i] = i;
14251 /* Copy BLOCK_SIZE_BYTES chunks. */
14253 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14255 /* Load words. */
14256 if (src_aligned && interleave_factor > 1)
14258 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14259 TRUE, srcbase, &srcoffset));
14260 src_autoinc += UNITS_PER_WORD * interleave_factor;
14262 else
14264 for (j = 0; j < interleave_factor; j++)
14266 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14267 - src_autoinc));
14268 mem = adjust_automodify_address (srcbase, SImode, addr,
14269 srcoffset + j * UNITS_PER_WORD);
14270 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14272 srcoffset += block_size_bytes;
14275 /* Store words. */
14276 if (dst_aligned && interleave_factor > 1)
14278 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14279 TRUE, dstbase, &dstoffset));
14280 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14282 else
14284 for (j = 0; j < interleave_factor; j++)
14286 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14287 - dst_autoinc));
14288 mem = adjust_automodify_address (dstbase, SImode, addr,
14289 dstoffset + j * UNITS_PER_WORD);
14290 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14292 dstoffset += block_size_bytes;
14295 remaining -= block_size_bytes;
14298 /* Copy any whole words left (note these aren't interleaved with any
14299 subsequent halfword/byte load/stores in the interests of simplicity). */
14301 words = remaining / UNITS_PER_WORD;
14303 gcc_assert (words < interleave_factor);
14305 if (src_aligned && words > 1)
14307 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14308 &srcoffset));
14309 src_autoinc += UNITS_PER_WORD * words;
14311 else
14313 for (j = 0; j < words; j++)
14315 addr = plus_constant (Pmode, src,
14316 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14317 mem = adjust_automodify_address (srcbase, SImode, addr,
14318 srcoffset + j * UNITS_PER_WORD);
14319 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14321 srcoffset += words * UNITS_PER_WORD;
14324 if (dst_aligned && words > 1)
14326 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14327 &dstoffset));
14328 dst_autoinc += words * UNITS_PER_WORD;
14330 else
14332 for (j = 0; j < words; j++)
14334 addr = plus_constant (Pmode, dst,
14335 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14336 mem = adjust_automodify_address (dstbase, SImode, addr,
14337 dstoffset + j * UNITS_PER_WORD);
14338 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14340 dstoffset += words * UNITS_PER_WORD;
14343 remaining -= words * UNITS_PER_WORD;
14345 gcc_assert (remaining < 4);
14347 /* Copy a halfword if necessary. */
14349 if (remaining >= 2)
14351 halfword_tmp = gen_reg_rtx (SImode);
14353 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14354 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14355 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14357 /* Either write out immediately, or delay until we've loaded the last
14358 byte, depending on interleave factor. */
14359 if (interleave_factor == 1)
14361 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14362 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14363 emit_insn (gen_unaligned_storehi (mem,
14364 gen_lowpart (HImode, halfword_tmp)));
14365 halfword_tmp = NULL;
14366 dstoffset += 2;
14369 remaining -= 2;
14370 srcoffset += 2;
14373 gcc_assert (remaining < 2);
14375 /* Copy last byte. */
14377 if ((remaining & 1) != 0)
14379 byte_tmp = gen_reg_rtx (SImode);
14381 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14382 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14383 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14385 if (interleave_factor == 1)
14387 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14388 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14389 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14390 byte_tmp = NULL;
14391 dstoffset++;
14394 remaining--;
14395 srcoffset++;
14398 /* Store last halfword if we haven't done so already. */
14400 if (halfword_tmp)
14402 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14403 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14404 emit_insn (gen_unaligned_storehi (mem,
14405 gen_lowpart (HImode, halfword_tmp)));
14406 dstoffset += 2;
14409 /* Likewise for last byte. */
14411 if (byte_tmp)
14413 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14414 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14415 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14416 dstoffset++;
14419 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14422 /* From mips_adjust_block_mem:
14424 Helper function for doing a loop-based block operation on memory
14425 reference MEM. Each iteration of the loop will operate on LENGTH
14426 bytes of MEM.
14428 Create a new base register for use within the loop and point it to
14429 the start of MEM. Create a new memory reference that uses this
14430 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14432 static void
14433 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14434 rtx *loop_mem)
14436 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14438 /* Although the new mem does not refer to a known location,
14439 it does keep up to LENGTH bytes of alignment. */
14440 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14441 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14444 /* From mips_block_move_loop:
14446 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14447 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14448 the memory regions do not overlap. */
14450 static void
14451 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14452 unsigned int interleave_factor,
14453 HOST_WIDE_INT bytes_per_iter)
14455 rtx src_reg, dest_reg, final_src, test;
14456 HOST_WIDE_INT leftover;
14458 leftover = length % bytes_per_iter;
14459 length -= leftover;
14461 /* Create registers and memory references for use within the loop. */
14462 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14463 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14465 /* Calculate the value that SRC_REG should have after the last iteration of
14466 the loop. */
14467 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14468 0, 0, OPTAB_WIDEN);
14470 /* Emit the start of the loop. */
14471 rtx_code_label *label = gen_label_rtx ();
14472 emit_label (label);
14474 /* Emit the loop body. */
14475 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14476 interleave_factor);
14478 /* Move on to the next block. */
14479 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14480 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14482 /* Emit the loop condition. */
14483 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14484 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14486 /* Mop up any left-over bytes. */
14487 if (leftover)
14488 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14491 /* Emit a block move when either the source or destination is unaligned (not
14492 aligned to a four-byte boundary). This may need further tuning depending on
14493 core type, optimize_size setting, etc. */
14495 static int
14496 arm_movmemqi_unaligned (rtx *operands)
14498 HOST_WIDE_INT length = INTVAL (operands[2]);
14500 if (optimize_size)
14502 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14503 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14504 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14505 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14506 or dst_aligned though: allow more interleaving in those cases since the
14507 resulting code can be smaller. */
14508 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14509 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14511 if (length > 12)
14512 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14513 interleave_factor, bytes_per_iter);
14514 else
14515 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14516 interleave_factor);
14518 else
14520 /* Note that the loop created by arm_block_move_unaligned_loop may be
14521 subject to loop unrolling, which makes tuning this condition a little
14522 redundant. */
14523 if (length > 32)
14524 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14525 else
14526 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14529 return 1;
14533 arm_gen_movmemqi (rtx *operands)
14535 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14536 HOST_WIDE_INT srcoffset, dstoffset;
14537 int i;
14538 rtx src, dst, srcbase, dstbase;
14539 rtx part_bytes_reg = NULL;
14540 rtx mem;
14542 if (!CONST_INT_P (operands[2])
14543 || !CONST_INT_P (operands[3])
14544 || INTVAL (operands[2]) > 64)
14545 return 0;
14547 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14548 return arm_movmemqi_unaligned (operands);
14550 if (INTVAL (operands[3]) & 3)
14551 return 0;
14553 dstbase = operands[0];
14554 srcbase = operands[1];
14556 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14557 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14559 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14560 out_words_to_go = INTVAL (operands[2]) / 4;
14561 last_bytes = INTVAL (operands[2]) & 3;
14562 dstoffset = srcoffset = 0;
14564 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14565 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14567 for (i = 0; in_words_to_go >= 2; i+=4)
14569 if (in_words_to_go > 4)
14570 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14571 TRUE, srcbase, &srcoffset));
14572 else
14573 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14574 src, FALSE, srcbase,
14575 &srcoffset));
14577 if (out_words_to_go)
14579 if (out_words_to_go > 4)
14580 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14581 TRUE, dstbase, &dstoffset));
14582 else if (out_words_to_go != 1)
14583 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14584 out_words_to_go, dst,
14585 (last_bytes == 0
14586 ? FALSE : TRUE),
14587 dstbase, &dstoffset));
14588 else
14590 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14591 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14592 if (last_bytes != 0)
14594 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14595 dstoffset += 4;
14600 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14601 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14604 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14605 if (out_words_to_go)
14607 rtx sreg;
14609 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14610 sreg = copy_to_reg (mem);
14612 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14613 emit_move_insn (mem, sreg);
14614 in_words_to_go--;
14616 gcc_assert (!in_words_to_go); /* Sanity check */
14619 if (in_words_to_go)
14621 gcc_assert (in_words_to_go > 0);
14623 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14624 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14627 gcc_assert (!last_bytes || part_bytes_reg);
14629 if (BYTES_BIG_ENDIAN && last_bytes)
14631 rtx tmp = gen_reg_rtx (SImode);
14633 /* The bytes we want are in the top end of the word. */
14634 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14635 GEN_INT (8 * (4 - last_bytes))));
14636 part_bytes_reg = tmp;
14638 while (last_bytes)
14640 mem = adjust_automodify_address (dstbase, QImode,
14641 plus_constant (Pmode, dst,
14642 last_bytes - 1),
14643 dstoffset + last_bytes - 1);
14644 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14646 if (--last_bytes)
14648 tmp = gen_reg_rtx (SImode);
14649 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14650 part_bytes_reg = tmp;
14655 else
14657 if (last_bytes > 1)
14659 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14660 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14661 last_bytes -= 2;
14662 if (last_bytes)
14664 rtx tmp = gen_reg_rtx (SImode);
14665 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14666 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14667 part_bytes_reg = tmp;
14668 dstoffset += 2;
14672 if (last_bytes)
14674 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14675 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14679 return 1;
14682 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14683 by mode size. */
14684 inline static rtx
14685 next_consecutive_mem (rtx mem)
14687 machine_mode mode = GET_MODE (mem);
14688 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14689 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14691 return adjust_automodify_address (mem, mode, addr, offset);
14694 /* Copy using LDRD/STRD instructions whenever possible.
14695 Returns true upon success. */
14696 bool
14697 gen_movmem_ldrd_strd (rtx *operands)
14699 unsigned HOST_WIDE_INT len;
14700 HOST_WIDE_INT align;
14701 rtx src, dst, base;
14702 rtx reg0;
14703 bool src_aligned, dst_aligned;
14704 bool src_volatile, dst_volatile;
14706 gcc_assert (CONST_INT_P (operands[2]));
14707 gcc_assert (CONST_INT_P (operands[3]));
14709 len = UINTVAL (operands[2]);
14710 if (len > 64)
14711 return false;
14713 /* Maximum alignment we can assume for both src and dst buffers. */
14714 align = INTVAL (operands[3]);
14716 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14717 return false;
14719 /* Place src and dst addresses in registers
14720 and update the corresponding mem rtx. */
14721 dst = operands[0];
14722 dst_volatile = MEM_VOLATILE_P (dst);
14723 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14724 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14725 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14727 src = operands[1];
14728 src_volatile = MEM_VOLATILE_P (src);
14729 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14730 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14731 src = adjust_automodify_address (src, VOIDmode, base, 0);
14733 if (!unaligned_access && !(src_aligned && dst_aligned))
14734 return false;
14736 if (src_volatile || dst_volatile)
14737 return false;
14739 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14740 if (!(dst_aligned || src_aligned))
14741 return arm_gen_movmemqi (operands);
14743 src = adjust_address (src, DImode, 0);
14744 dst = adjust_address (dst, DImode, 0);
14745 while (len >= 8)
14747 len -= 8;
14748 reg0 = gen_reg_rtx (DImode);
14749 if (src_aligned)
14750 emit_move_insn (reg0, src);
14751 else
14752 emit_insn (gen_unaligned_loaddi (reg0, src));
14754 if (dst_aligned)
14755 emit_move_insn (dst, reg0);
14756 else
14757 emit_insn (gen_unaligned_storedi (dst, reg0));
14759 src = next_consecutive_mem (src);
14760 dst = next_consecutive_mem (dst);
14763 gcc_assert (len < 8);
14764 if (len >= 4)
14766 /* More than a word but less than a double-word to copy. Copy a word. */
14767 reg0 = gen_reg_rtx (SImode);
14768 src = adjust_address (src, SImode, 0);
14769 dst = adjust_address (dst, SImode, 0);
14770 if (src_aligned)
14771 emit_move_insn (reg0, src);
14772 else
14773 emit_insn (gen_unaligned_loadsi (reg0, src));
14775 if (dst_aligned)
14776 emit_move_insn (dst, reg0);
14777 else
14778 emit_insn (gen_unaligned_storesi (dst, reg0));
14780 src = next_consecutive_mem (src);
14781 dst = next_consecutive_mem (dst);
14782 len -= 4;
14785 if (len == 0)
14786 return true;
14788 /* Copy the remaining bytes. */
14789 if (len >= 2)
14791 dst = adjust_address (dst, HImode, 0);
14792 src = adjust_address (src, HImode, 0);
14793 reg0 = gen_reg_rtx (SImode);
14794 if (src_aligned)
14795 emit_insn (gen_zero_extendhisi2 (reg0, src));
14796 else
14797 emit_insn (gen_unaligned_loadhiu (reg0, src));
14799 if (dst_aligned)
14800 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14801 else
14802 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14804 src = next_consecutive_mem (src);
14805 dst = next_consecutive_mem (dst);
14806 if (len == 2)
14807 return true;
14810 dst = adjust_address (dst, QImode, 0);
14811 src = adjust_address (src, QImode, 0);
14812 reg0 = gen_reg_rtx (QImode);
14813 emit_move_insn (reg0, src);
14814 emit_move_insn (dst, reg0);
14815 return true;
14818 /* Select a dominance comparison mode if possible for a test of the general
14819 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14820 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14821 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14822 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14823 In all cases OP will be either EQ or NE, but we don't need to know which
14824 here. If we are unable to support a dominance comparison we return
14825 CC mode. This will then fail to match for the RTL expressions that
14826 generate this call. */
14827 machine_mode
14828 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14830 enum rtx_code cond1, cond2;
14831 int swapped = 0;
14833 /* Currently we will probably get the wrong result if the individual
14834 comparisons are not simple. This also ensures that it is safe to
14835 reverse a comparison if necessary. */
14836 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14837 != CCmode)
14838 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14839 != CCmode))
14840 return CCmode;
14842 /* The if_then_else variant of this tests the second condition if the
14843 first passes, but is true if the first fails. Reverse the first
14844 condition to get a true "inclusive-or" expression. */
14845 if (cond_or == DOM_CC_NX_OR_Y)
14846 cond1 = reverse_condition (cond1);
14848 /* If the comparisons are not equal, and one doesn't dominate the other,
14849 then we can't do this. */
14850 if (cond1 != cond2
14851 && !comparison_dominates_p (cond1, cond2)
14852 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14853 return CCmode;
14855 if (swapped)
14856 std::swap (cond1, cond2);
14858 switch (cond1)
14860 case EQ:
14861 if (cond_or == DOM_CC_X_AND_Y)
14862 return CC_DEQmode;
14864 switch (cond2)
14866 case EQ: return CC_DEQmode;
14867 case LE: return CC_DLEmode;
14868 case LEU: return CC_DLEUmode;
14869 case GE: return CC_DGEmode;
14870 case GEU: return CC_DGEUmode;
14871 default: gcc_unreachable ();
14874 case LT:
14875 if (cond_or == DOM_CC_X_AND_Y)
14876 return CC_DLTmode;
14878 switch (cond2)
14880 case LT:
14881 return CC_DLTmode;
14882 case LE:
14883 return CC_DLEmode;
14884 case NE:
14885 return CC_DNEmode;
14886 default:
14887 gcc_unreachable ();
14890 case GT:
14891 if (cond_or == DOM_CC_X_AND_Y)
14892 return CC_DGTmode;
14894 switch (cond2)
14896 case GT:
14897 return CC_DGTmode;
14898 case GE:
14899 return CC_DGEmode;
14900 case NE:
14901 return CC_DNEmode;
14902 default:
14903 gcc_unreachable ();
14906 case LTU:
14907 if (cond_or == DOM_CC_X_AND_Y)
14908 return CC_DLTUmode;
14910 switch (cond2)
14912 case LTU:
14913 return CC_DLTUmode;
14914 case LEU:
14915 return CC_DLEUmode;
14916 case NE:
14917 return CC_DNEmode;
14918 default:
14919 gcc_unreachable ();
14922 case GTU:
14923 if (cond_or == DOM_CC_X_AND_Y)
14924 return CC_DGTUmode;
14926 switch (cond2)
14928 case GTU:
14929 return CC_DGTUmode;
14930 case GEU:
14931 return CC_DGEUmode;
14932 case NE:
14933 return CC_DNEmode;
14934 default:
14935 gcc_unreachable ();
14938 /* The remaining cases only occur when both comparisons are the
14939 same. */
14940 case NE:
14941 gcc_assert (cond1 == cond2);
14942 return CC_DNEmode;
14944 case LE:
14945 gcc_assert (cond1 == cond2);
14946 return CC_DLEmode;
14948 case GE:
14949 gcc_assert (cond1 == cond2);
14950 return CC_DGEmode;
14952 case LEU:
14953 gcc_assert (cond1 == cond2);
14954 return CC_DLEUmode;
14956 case GEU:
14957 gcc_assert (cond1 == cond2);
14958 return CC_DGEUmode;
14960 default:
14961 gcc_unreachable ();
14965 machine_mode
14966 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14968 /* All floating point compares return CCFP if it is an equality
14969 comparison, and CCFPE otherwise. */
14970 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14972 switch (op)
14974 case EQ:
14975 case NE:
14976 case UNORDERED:
14977 case ORDERED:
14978 case UNLT:
14979 case UNLE:
14980 case UNGT:
14981 case UNGE:
14982 case UNEQ:
14983 case LTGT:
14984 return CCFPmode;
14986 case LT:
14987 case LE:
14988 case GT:
14989 case GE:
14990 return CCFPEmode;
14992 default:
14993 gcc_unreachable ();
14997 /* A compare with a shifted operand. Because of canonicalization, the
14998 comparison will have to be swapped when we emit the assembler. */
14999 if (GET_MODE (y) == SImode
15000 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15001 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15002 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15003 || GET_CODE (x) == ROTATERT))
15004 return CC_SWPmode;
15006 /* This operation is performed swapped, but since we only rely on the Z
15007 flag we don't need an additional mode. */
15008 if (GET_MODE (y) == SImode
15009 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15010 && GET_CODE (x) == NEG
15011 && (op == EQ || op == NE))
15012 return CC_Zmode;
15014 /* This is a special case that is used by combine to allow a
15015 comparison of a shifted byte load to be split into a zero-extend
15016 followed by a comparison of the shifted integer (only valid for
15017 equalities and unsigned inequalities). */
15018 if (GET_MODE (x) == SImode
15019 && GET_CODE (x) == ASHIFT
15020 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15021 && GET_CODE (XEXP (x, 0)) == SUBREG
15022 && MEM_P (SUBREG_REG (XEXP (x, 0)))
15023 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15024 && (op == EQ || op == NE
15025 || op == GEU || op == GTU || op == LTU || op == LEU)
15026 && CONST_INT_P (y))
15027 return CC_Zmode;
15029 /* A construct for a conditional compare, if the false arm contains
15030 0, then both conditions must be true, otherwise either condition
15031 must be true. Not all conditions are possible, so CCmode is
15032 returned if it can't be done. */
15033 if (GET_CODE (x) == IF_THEN_ELSE
15034 && (XEXP (x, 2) == const0_rtx
15035 || XEXP (x, 2) == const1_rtx)
15036 && COMPARISON_P (XEXP (x, 0))
15037 && COMPARISON_P (XEXP (x, 1)))
15038 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15039 INTVAL (XEXP (x, 2)));
15041 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15042 if (GET_CODE (x) == AND
15043 && (op == EQ || op == NE)
15044 && COMPARISON_P (XEXP (x, 0))
15045 && COMPARISON_P (XEXP (x, 1)))
15046 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15047 DOM_CC_X_AND_Y);
15049 if (GET_CODE (x) == IOR
15050 && (op == EQ || op == NE)
15051 && COMPARISON_P (XEXP (x, 0))
15052 && COMPARISON_P (XEXP (x, 1)))
15053 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15054 DOM_CC_X_OR_Y);
15056 /* An operation (on Thumb) where we want to test for a single bit.
15057 This is done by shifting that bit up into the top bit of a
15058 scratch register; we can then branch on the sign bit. */
15059 if (TARGET_THUMB1
15060 && GET_MODE (x) == SImode
15061 && (op == EQ || op == NE)
15062 && GET_CODE (x) == ZERO_EXTRACT
15063 && XEXP (x, 1) == const1_rtx)
15064 return CC_Nmode;
15066 /* An operation that sets the condition codes as a side-effect, the
15067 V flag is not set correctly, so we can only use comparisons where
15068 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15069 instead.) */
15070 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15071 if (GET_MODE (x) == SImode
15072 && y == const0_rtx
15073 && (op == EQ || op == NE || op == LT || op == GE)
15074 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15075 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15076 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15077 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15078 || GET_CODE (x) == LSHIFTRT
15079 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15080 || GET_CODE (x) == ROTATERT
15081 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15082 return CC_NOOVmode;
15084 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15085 return CC_Zmode;
15087 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15088 && GET_CODE (x) == PLUS
15089 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15090 return CC_Cmode;
15092 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15094 switch (op)
15096 case EQ:
15097 case NE:
15098 /* A DImode comparison against zero can be implemented by
15099 or'ing the two halves together. */
15100 if (y == const0_rtx)
15101 return CC_Zmode;
15103 /* We can do an equality test in three Thumb instructions. */
15104 if (!TARGET_32BIT)
15105 return CC_Zmode;
15107 /* FALLTHROUGH */
15109 case LTU:
15110 case LEU:
15111 case GTU:
15112 case GEU:
15113 /* DImode unsigned comparisons can be implemented by cmp +
15114 cmpeq without a scratch register. Not worth doing in
15115 Thumb-2. */
15116 if (TARGET_32BIT)
15117 return CC_CZmode;
15119 /* FALLTHROUGH */
15121 case LT:
15122 case LE:
15123 case GT:
15124 case GE:
15125 /* DImode signed and unsigned comparisons can be implemented
15126 by cmp + sbcs with a scratch register, but that does not
15127 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15128 gcc_assert (op != EQ && op != NE);
15129 return CC_NCVmode;
15131 default:
15132 gcc_unreachable ();
15136 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15137 return GET_MODE (x);
15139 return CCmode;
15142 /* X and Y are two things to compare using CODE. Emit the compare insn and
15143 return the rtx for register 0 in the proper mode. FP means this is a
15144 floating point compare: I don't think that it is needed on the arm. */
15146 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15148 machine_mode mode;
15149 rtx cc_reg;
15150 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15152 /* We might have X as a constant, Y as a register because of the predicates
15153 used for cmpdi. If so, force X to a register here. */
15154 if (dimode_comparison && !REG_P (x))
15155 x = force_reg (DImode, x);
15157 mode = SELECT_CC_MODE (code, x, y);
15158 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15160 if (dimode_comparison
15161 && mode != CC_CZmode)
15163 rtx clobber, set;
15165 /* To compare two non-zero values for equality, XOR them and
15166 then compare against zero. Not used for ARM mode; there
15167 CC_CZmode is cheaper. */
15168 if (mode == CC_Zmode && y != const0_rtx)
15170 gcc_assert (!reload_completed);
15171 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15172 y = const0_rtx;
15175 /* A scratch register is required. */
15176 if (reload_completed)
15177 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15178 else
15179 scratch = gen_rtx_SCRATCH (SImode);
15181 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15182 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
15183 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15185 else
15186 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15188 return cc_reg;
15191 /* Generate a sequence of insns that will generate the correct return
15192 address mask depending on the physical architecture that the program
15193 is running on. */
15195 arm_gen_return_addr_mask (void)
15197 rtx reg = gen_reg_rtx (Pmode);
15199 emit_insn (gen_return_addr_mask (reg));
15200 return reg;
15203 void
15204 arm_reload_in_hi (rtx *operands)
15206 rtx ref = operands[1];
15207 rtx base, scratch;
15208 HOST_WIDE_INT offset = 0;
15210 if (GET_CODE (ref) == SUBREG)
15212 offset = SUBREG_BYTE (ref);
15213 ref = SUBREG_REG (ref);
15216 if (REG_P (ref))
15218 /* We have a pseudo which has been spilt onto the stack; there
15219 are two cases here: the first where there is a simple
15220 stack-slot replacement and a second where the stack-slot is
15221 out of range, or is used as a subreg. */
15222 if (reg_equiv_mem (REGNO (ref)))
15224 ref = reg_equiv_mem (REGNO (ref));
15225 base = find_replacement (&XEXP (ref, 0));
15227 else
15228 /* The slot is out of range, or was dressed up in a SUBREG. */
15229 base = reg_equiv_address (REGNO (ref));
15231 else
15232 base = find_replacement (&XEXP (ref, 0));
15234 /* Handle the case where the address is too complex to be offset by 1. */
15235 if (GET_CODE (base) == MINUS
15236 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15238 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15240 emit_set_insn (base_plus, base);
15241 base = base_plus;
15243 else if (GET_CODE (base) == PLUS)
15245 /* The addend must be CONST_INT, or we would have dealt with it above. */
15246 HOST_WIDE_INT hi, lo;
15248 offset += INTVAL (XEXP (base, 1));
15249 base = XEXP (base, 0);
15251 /* Rework the address into a legal sequence of insns. */
15252 /* Valid range for lo is -4095 -> 4095 */
15253 lo = (offset >= 0
15254 ? (offset & 0xfff)
15255 : -((-offset) & 0xfff));
15257 /* Corner case, if lo is the max offset then we would be out of range
15258 once we have added the additional 1 below, so bump the msb into the
15259 pre-loading insn(s). */
15260 if (lo == 4095)
15261 lo &= 0x7ff;
15263 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15264 ^ (HOST_WIDE_INT) 0x80000000)
15265 - (HOST_WIDE_INT) 0x80000000);
15267 gcc_assert (hi + lo == offset);
15269 if (hi != 0)
15271 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15273 /* Get the base address; addsi3 knows how to handle constants
15274 that require more than one insn. */
15275 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15276 base = base_plus;
15277 offset = lo;
15281 /* Operands[2] may overlap operands[0] (though it won't overlap
15282 operands[1]), that's why we asked for a DImode reg -- so we can
15283 use the bit that does not overlap. */
15284 if (REGNO (operands[2]) == REGNO (operands[0]))
15285 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15286 else
15287 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15289 emit_insn (gen_zero_extendqisi2 (scratch,
15290 gen_rtx_MEM (QImode,
15291 plus_constant (Pmode, base,
15292 offset))));
15293 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15294 gen_rtx_MEM (QImode,
15295 plus_constant (Pmode, base,
15296 offset + 1))));
15297 if (!BYTES_BIG_ENDIAN)
15298 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15299 gen_rtx_IOR (SImode,
15300 gen_rtx_ASHIFT
15301 (SImode,
15302 gen_rtx_SUBREG (SImode, operands[0], 0),
15303 GEN_INT (8)),
15304 scratch));
15305 else
15306 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15307 gen_rtx_IOR (SImode,
15308 gen_rtx_ASHIFT (SImode, scratch,
15309 GEN_INT (8)),
15310 gen_rtx_SUBREG (SImode, operands[0], 0)));
15313 /* Handle storing a half-word to memory during reload by synthesizing as two
15314 byte stores. Take care not to clobber the input values until after we
15315 have moved them somewhere safe. This code assumes that if the DImode
15316 scratch in operands[2] overlaps either the input value or output address
15317 in some way, then that value must die in this insn (we absolutely need
15318 two scratch registers for some corner cases). */
15319 void
15320 arm_reload_out_hi (rtx *operands)
15322 rtx ref = operands[0];
15323 rtx outval = operands[1];
15324 rtx base, scratch;
15325 HOST_WIDE_INT offset = 0;
15327 if (GET_CODE (ref) == SUBREG)
15329 offset = SUBREG_BYTE (ref);
15330 ref = SUBREG_REG (ref);
15333 if (REG_P (ref))
15335 /* We have a pseudo which has been spilt onto the stack; there
15336 are two cases here: the first where there is a simple
15337 stack-slot replacement and a second where the stack-slot is
15338 out of range, or is used as a subreg. */
15339 if (reg_equiv_mem (REGNO (ref)))
15341 ref = reg_equiv_mem (REGNO (ref));
15342 base = find_replacement (&XEXP (ref, 0));
15344 else
15345 /* The slot is out of range, or was dressed up in a SUBREG. */
15346 base = reg_equiv_address (REGNO (ref));
15348 else
15349 base = find_replacement (&XEXP (ref, 0));
15351 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15353 /* Handle the case where the address is too complex to be offset by 1. */
15354 if (GET_CODE (base) == MINUS
15355 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15357 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15359 /* Be careful not to destroy OUTVAL. */
15360 if (reg_overlap_mentioned_p (base_plus, outval))
15362 /* Updating base_plus might destroy outval, see if we can
15363 swap the scratch and base_plus. */
15364 if (!reg_overlap_mentioned_p (scratch, outval))
15365 std::swap (scratch, base_plus);
15366 else
15368 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15370 /* Be conservative and copy OUTVAL into the scratch now,
15371 this should only be necessary if outval is a subreg
15372 of something larger than a word. */
15373 /* XXX Might this clobber base? I can't see how it can,
15374 since scratch is known to overlap with OUTVAL, and
15375 must be wider than a word. */
15376 emit_insn (gen_movhi (scratch_hi, outval));
15377 outval = scratch_hi;
15381 emit_set_insn (base_plus, base);
15382 base = base_plus;
15384 else if (GET_CODE (base) == PLUS)
15386 /* The addend must be CONST_INT, or we would have dealt with it above. */
15387 HOST_WIDE_INT hi, lo;
15389 offset += INTVAL (XEXP (base, 1));
15390 base = XEXP (base, 0);
15392 /* Rework the address into a legal sequence of insns. */
15393 /* Valid range for lo is -4095 -> 4095 */
15394 lo = (offset >= 0
15395 ? (offset & 0xfff)
15396 : -((-offset) & 0xfff));
15398 /* Corner case, if lo is the max offset then we would be out of range
15399 once we have added the additional 1 below, so bump the msb into the
15400 pre-loading insn(s). */
15401 if (lo == 4095)
15402 lo &= 0x7ff;
15404 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15405 ^ (HOST_WIDE_INT) 0x80000000)
15406 - (HOST_WIDE_INT) 0x80000000);
15408 gcc_assert (hi + lo == offset);
15410 if (hi != 0)
15412 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15414 /* Be careful not to destroy OUTVAL. */
15415 if (reg_overlap_mentioned_p (base_plus, outval))
15417 /* Updating base_plus might destroy outval, see if we
15418 can swap the scratch and base_plus. */
15419 if (!reg_overlap_mentioned_p (scratch, outval))
15420 std::swap (scratch, base_plus);
15421 else
15423 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15425 /* Be conservative and copy outval into scratch now,
15426 this should only be necessary if outval is a
15427 subreg of something larger than a word. */
15428 /* XXX Might this clobber base? I can't see how it
15429 can, since scratch is known to overlap with
15430 outval. */
15431 emit_insn (gen_movhi (scratch_hi, outval));
15432 outval = scratch_hi;
15436 /* Get the base address; addsi3 knows how to handle constants
15437 that require more than one insn. */
15438 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15439 base = base_plus;
15440 offset = lo;
15444 if (BYTES_BIG_ENDIAN)
15446 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15447 plus_constant (Pmode, base,
15448 offset + 1)),
15449 gen_lowpart (QImode, outval)));
15450 emit_insn (gen_lshrsi3 (scratch,
15451 gen_rtx_SUBREG (SImode, outval, 0),
15452 GEN_INT (8)));
15453 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15454 offset)),
15455 gen_lowpart (QImode, scratch)));
15457 else
15459 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15460 offset)),
15461 gen_lowpart (QImode, outval)));
15462 emit_insn (gen_lshrsi3 (scratch,
15463 gen_rtx_SUBREG (SImode, outval, 0),
15464 GEN_INT (8)));
15465 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15466 plus_constant (Pmode, base,
15467 offset + 1)),
15468 gen_lowpart (QImode, scratch)));
15472 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15473 (padded to the size of a word) should be passed in a register. */
15475 static bool
15476 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15478 if (TARGET_AAPCS_BASED)
15479 return must_pass_in_stack_var_size (mode, type);
15480 else
15481 return must_pass_in_stack_var_size_or_pad (mode, type);
15485 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15486 Return true if an argument passed on the stack should be padded upwards,
15487 i.e. if the least-significant byte has useful data.
15488 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15489 aggregate types are placed in the lowest memory address. */
15491 bool
15492 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15494 if (!TARGET_AAPCS_BASED)
15495 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15497 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15498 return false;
15500 return true;
15504 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15505 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15506 register has useful data, and return the opposite if the most
15507 significant byte does. */
15509 bool
15510 arm_pad_reg_upward (machine_mode mode,
15511 tree type, int first ATTRIBUTE_UNUSED)
15513 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15515 /* For AAPCS, small aggregates, small fixed-point types,
15516 and small complex types are always padded upwards. */
15517 if (type)
15519 if ((AGGREGATE_TYPE_P (type)
15520 || TREE_CODE (type) == COMPLEX_TYPE
15521 || FIXED_POINT_TYPE_P (type))
15522 && int_size_in_bytes (type) <= 4)
15523 return true;
15525 else
15527 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15528 && GET_MODE_SIZE (mode) <= 4)
15529 return true;
15533 /* Otherwise, use default padding. */
15534 return !BYTES_BIG_ENDIAN;
15537 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15538 assuming that the address in the base register is word aligned. */
15539 bool
15540 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15542 HOST_WIDE_INT max_offset;
15544 /* Offset must be a multiple of 4 in Thumb mode. */
15545 if (TARGET_THUMB2 && ((offset & 3) != 0))
15546 return false;
15548 if (TARGET_THUMB2)
15549 max_offset = 1020;
15550 else if (TARGET_ARM)
15551 max_offset = 255;
15552 else
15553 return false;
15555 return ((offset <= max_offset) && (offset >= -max_offset));
15558 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15559 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15560 Assumes that the address in the base register RN is word aligned. Pattern
15561 guarantees that both memory accesses use the same base register,
15562 the offsets are constants within the range, and the gap between the offsets is 4.
15563 If preload complete then check that registers are legal. WBACK indicates whether
15564 address is updated. LOAD indicates whether memory access is load or store. */
15565 bool
15566 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15567 bool wback, bool load)
15569 unsigned int t, t2, n;
15571 if (!reload_completed)
15572 return true;
15574 if (!offset_ok_for_ldrd_strd (offset))
15575 return false;
15577 t = REGNO (rt);
15578 t2 = REGNO (rt2);
15579 n = REGNO (rn);
15581 if ((TARGET_THUMB2)
15582 && ((wback && (n == t || n == t2))
15583 || (t == SP_REGNUM)
15584 || (t == PC_REGNUM)
15585 || (t2 == SP_REGNUM)
15586 || (t2 == PC_REGNUM)
15587 || (!load && (n == PC_REGNUM))
15588 || (load && (t == t2))
15589 /* Triggers Cortex-M3 LDRD errata. */
15590 || (!wback && load && fix_cm3_ldrd && (n == t))))
15591 return false;
15593 if ((TARGET_ARM)
15594 && ((wback && (n == t || n == t2))
15595 || (t2 == PC_REGNUM)
15596 || (t % 2 != 0) /* First destination register is not even. */
15597 || (t2 != t + 1)
15598 /* PC can be used as base register (for offset addressing only),
15599 but it is depricated. */
15600 || (n == PC_REGNUM)))
15601 return false;
15603 return true;
15606 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15607 operand MEM's address contains an immediate offset from the base
15608 register and has no side effects, in which case it sets BASE and
15609 OFFSET accordingly. */
15610 static bool
15611 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15613 rtx addr;
15615 gcc_assert (base != NULL && offset != NULL);
15617 /* TODO: Handle more general memory operand patterns, such as
15618 PRE_DEC and PRE_INC. */
15620 if (side_effects_p (mem))
15621 return false;
15623 /* Can't deal with subregs. */
15624 if (GET_CODE (mem) == SUBREG)
15625 return false;
15627 gcc_assert (MEM_P (mem));
15629 *offset = const0_rtx;
15631 addr = XEXP (mem, 0);
15633 /* If addr isn't valid for DImode, then we can't handle it. */
15634 if (!arm_legitimate_address_p (DImode, addr,
15635 reload_in_progress || reload_completed))
15636 return false;
15638 if (REG_P (addr))
15640 *base = addr;
15641 return true;
15643 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15645 *base = XEXP (addr, 0);
15646 *offset = XEXP (addr, 1);
15647 return (REG_P (*base) && CONST_INT_P (*offset));
15650 return false;
15653 /* Called from a peephole2 to replace two word-size accesses with a
15654 single LDRD/STRD instruction. Returns true iff we can generate a
15655 new instruction sequence. That is, both accesses use the same base
15656 register and the gap between constant offsets is 4. This function
15657 may reorder its operands to match ldrd/strd RTL templates.
15658 OPERANDS are the operands found by the peephole matcher;
15659 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15660 corresponding memory operands. LOAD indicaates whether the access
15661 is load or store. CONST_STORE indicates a store of constant
15662 integer values held in OPERANDS[4,5] and assumes that the pattern
15663 is of length 4 insn, for the purpose of checking dead registers.
15664 COMMUTE indicates that register operands may be reordered. */
15665 bool
15666 gen_operands_ldrd_strd (rtx *operands, bool load,
15667 bool const_store, bool commute)
15669 int nops = 2;
15670 HOST_WIDE_INT offsets[2], offset;
15671 rtx base = NULL_RTX;
15672 rtx cur_base, cur_offset, tmp;
15673 int i, gap;
15674 HARD_REG_SET regset;
15676 gcc_assert (!const_store || !load);
15677 /* Check that the memory references are immediate offsets from the
15678 same base register. Extract the base register, the destination
15679 registers, and the corresponding memory offsets. */
15680 for (i = 0; i < nops; i++)
15682 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15683 return false;
15685 if (i == 0)
15686 base = cur_base;
15687 else if (REGNO (base) != REGNO (cur_base))
15688 return false;
15690 offsets[i] = INTVAL (cur_offset);
15691 if (GET_CODE (operands[i]) == SUBREG)
15693 tmp = SUBREG_REG (operands[i]);
15694 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15695 operands[i] = tmp;
15699 /* Make sure there is no dependency between the individual loads. */
15700 if (load && REGNO (operands[0]) == REGNO (base))
15701 return false; /* RAW */
15703 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15704 return false; /* WAW */
15706 /* If the same input register is used in both stores
15707 when storing different constants, try to find a free register.
15708 For example, the code
15709 mov r0, 0
15710 str r0, [r2]
15711 mov r0, 1
15712 str r0, [r2, #4]
15713 can be transformed into
15714 mov r1, 0
15715 strd r1, r0, [r2]
15716 in Thumb mode assuming that r1 is free. */
15717 if (const_store
15718 && REGNO (operands[0]) == REGNO (operands[1])
15719 && INTVAL (operands[4]) != INTVAL (operands[5]))
15721 if (TARGET_THUMB2)
15723 CLEAR_HARD_REG_SET (regset);
15724 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15725 if (tmp == NULL_RTX)
15726 return false;
15728 /* Use the new register in the first load to ensure that
15729 if the original input register is not dead after peephole,
15730 then it will have the correct constant value. */
15731 operands[0] = tmp;
15733 else if (TARGET_ARM)
15735 return false;
15736 int regno = REGNO (operands[0]);
15737 if (!peep2_reg_dead_p (4, operands[0]))
15739 /* When the input register is even and is not dead after the
15740 pattern, it has to hold the second constant but we cannot
15741 form a legal STRD in ARM mode with this register as the second
15742 register. */
15743 if (regno % 2 == 0)
15744 return false;
15746 /* Is regno-1 free? */
15747 SET_HARD_REG_SET (regset);
15748 CLEAR_HARD_REG_BIT(regset, regno - 1);
15749 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15750 if (tmp == NULL_RTX)
15751 return false;
15753 operands[0] = tmp;
15755 else
15757 /* Find a DImode register. */
15758 CLEAR_HARD_REG_SET (regset);
15759 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15760 if (tmp != NULL_RTX)
15762 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15763 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15765 else
15767 /* Can we use the input register to form a DI register? */
15768 SET_HARD_REG_SET (regset);
15769 CLEAR_HARD_REG_BIT(regset,
15770 regno % 2 == 0 ? regno + 1 : regno - 1);
15771 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15772 if (tmp == NULL_RTX)
15773 return false;
15774 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15778 gcc_assert (operands[0] != NULL_RTX);
15779 gcc_assert (operands[1] != NULL_RTX);
15780 gcc_assert (REGNO (operands[0]) % 2 == 0);
15781 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15785 /* Make sure the instructions are ordered with lower memory access first. */
15786 if (offsets[0] > offsets[1])
15788 gap = offsets[0] - offsets[1];
15789 offset = offsets[1];
15791 /* Swap the instructions such that lower memory is accessed first. */
15792 std::swap (operands[0], operands[1]);
15793 std::swap (operands[2], operands[3]);
15794 if (const_store)
15795 std::swap (operands[4], operands[5]);
15797 else
15799 gap = offsets[1] - offsets[0];
15800 offset = offsets[0];
15803 /* Make sure accesses are to consecutive memory locations. */
15804 if (gap != 4)
15805 return false;
15807 /* Make sure we generate legal instructions. */
15808 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15809 false, load))
15810 return true;
15812 /* In Thumb state, where registers are almost unconstrained, there
15813 is little hope to fix it. */
15814 if (TARGET_THUMB2)
15815 return false;
15817 if (load && commute)
15819 /* Try reordering registers. */
15820 std::swap (operands[0], operands[1]);
15821 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15822 false, load))
15823 return true;
15826 if (const_store)
15828 /* If input registers are dead after this pattern, they can be
15829 reordered or replaced by other registers that are free in the
15830 current pattern. */
15831 if (!peep2_reg_dead_p (4, operands[0])
15832 || !peep2_reg_dead_p (4, operands[1]))
15833 return false;
15835 /* Try to reorder the input registers. */
15836 /* For example, the code
15837 mov r0, 0
15838 mov r1, 1
15839 str r1, [r2]
15840 str r0, [r2, #4]
15841 can be transformed into
15842 mov r1, 0
15843 mov r0, 1
15844 strd r0, [r2]
15846 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15847 false, false))
15849 std::swap (operands[0], operands[1]);
15850 return true;
15853 /* Try to find a free DI register. */
15854 CLEAR_HARD_REG_SET (regset);
15855 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15856 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15857 while (true)
15859 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15860 if (tmp == NULL_RTX)
15861 return false;
15863 /* DREG must be an even-numbered register in DImode.
15864 Split it into SI registers. */
15865 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15866 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15867 gcc_assert (operands[0] != NULL_RTX);
15868 gcc_assert (operands[1] != NULL_RTX);
15869 gcc_assert (REGNO (operands[0]) % 2 == 0);
15870 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15872 return (operands_ok_ldrd_strd (operands[0], operands[1],
15873 base, offset,
15874 false, load));
15878 return false;
15884 /* Print a symbolic form of X to the debug file, F. */
15885 static void
15886 arm_print_value (FILE *f, rtx x)
15888 switch (GET_CODE (x))
15890 case CONST_INT:
15891 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15892 return;
15894 case CONST_DOUBLE:
15895 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15896 return;
15898 case CONST_VECTOR:
15900 int i;
15902 fprintf (f, "<");
15903 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15905 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15906 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15907 fputc (',', f);
15909 fprintf (f, ">");
15911 return;
15913 case CONST_STRING:
15914 fprintf (f, "\"%s\"", XSTR (x, 0));
15915 return;
15917 case SYMBOL_REF:
15918 fprintf (f, "`%s'", XSTR (x, 0));
15919 return;
15921 case LABEL_REF:
15922 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15923 return;
15925 case CONST:
15926 arm_print_value (f, XEXP (x, 0));
15927 return;
15929 case PLUS:
15930 arm_print_value (f, XEXP (x, 0));
15931 fprintf (f, "+");
15932 arm_print_value (f, XEXP (x, 1));
15933 return;
15935 case PC:
15936 fprintf (f, "pc");
15937 return;
15939 default:
15940 fprintf (f, "????");
15941 return;
15945 /* Routines for manipulation of the constant pool. */
15947 /* Arm instructions cannot load a large constant directly into a
15948 register; they have to come from a pc relative load. The constant
15949 must therefore be placed in the addressable range of the pc
15950 relative load. Depending on the precise pc relative load
15951 instruction the range is somewhere between 256 bytes and 4k. This
15952 means that we often have to dump a constant inside a function, and
15953 generate code to branch around it.
15955 It is important to minimize this, since the branches will slow
15956 things down and make the code larger.
15958 Normally we can hide the table after an existing unconditional
15959 branch so that there is no interruption of the flow, but in the
15960 worst case the code looks like this:
15962 ldr rn, L1
15964 b L2
15965 align
15966 L1: .long value
15970 ldr rn, L3
15972 b L4
15973 align
15974 L3: .long value
15978 We fix this by performing a scan after scheduling, which notices
15979 which instructions need to have their operands fetched from the
15980 constant table and builds the table.
15982 The algorithm starts by building a table of all the constants that
15983 need fixing up and all the natural barriers in the function (places
15984 where a constant table can be dropped without breaking the flow).
15985 For each fixup we note how far the pc-relative replacement will be
15986 able to reach and the offset of the instruction into the function.
15988 Having built the table we then group the fixes together to form
15989 tables that are as large as possible (subject to addressing
15990 constraints) and emit each table of constants after the last
15991 barrier that is within range of all the instructions in the group.
15992 If a group does not contain a barrier, then we forcibly create one
15993 by inserting a jump instruction into the flow. Once the table has
15994 been inserted, the insns are then modified to reference the
15995 relevant entry in the pool.
15997 Possible enhancements to the algorithm (not implemented) are:
15999 1) For some processors and object formats, there may be benefit in
16000 aligning the pools to the start of cache lines; this alignment
16001 would need to be taken into account when calculating addressability
16002 of a pool. */
16004 /* These typedefs are located at the start of this file, so that
16005 they can be used in the prototypes there. This comment is to
16006 remind readers of that fact so that the following structures
16007 can be understood more easily.
16009 typedef struct minipool_node Mnode;
16010 typedef struct minipool_fixup Mfix; */
16012 struct minipool_node
16014 /* Doubly linked chain of entries. */
16015 Mnode * next;
16016 Mnode * prev;
16017 /* The maximum offset into the code that this entry can be placed. While
16018 pushing fixes for forward references, all entries are sorted in order
16019 of increasing max_address. */
16020 HOST_WIDE_INT max_address;
16021 /* Similarly for an entry inserted for a backwards ref. */
16022 HOST_WIDE_INT min_address;
16023 /* The number of fixes referencing this entry. This can become zero
16024 if we "unpush" an entry. In this case we ignore the entry when we
16025 come to emit the code. */
16026 int refcount;
16027 /* The offset from the start of the minipool. */
16028 HOST_WIDE_INT offset;
16029 /* The value in table. */
16030 rtx value;
16031 /* The mode of value. */
16032 machine_mode mode;
16033 /* The size of the value. With iWMMXt enabled
16034 sizes > 4 also imply an alignment of 8-bytes. */
16035 int fix_size;
16038 struct minipool_fixup
16040 Mfix * next;
16041 rtx_insn * insn;
16042 HOST_WIDE_INT address;
16043 rtx * loc;
16044 machine_mode mode;
16045 int fix_size;
16046 rtx value;
16047 Mnode * minipool;
16048 HOST_WIDE_INT forwards;
16049 HOST_WIDE_INT backwards;
16052 /* Fixes less than a word need padding out to a word boundary. */
16053 #define MINIPOOL_FIX_SIZE(mode) \
16054 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16056 static Mnode * minipool_vector_head;
16057 static Mnode * minipool_vector_tail;
16058 static rtx_code_label *minipool_vector_label;
16059 static int minipool_pad;
16061 /* The linked list of all minipool fixes required for this function. */
16062 Mfix * minipool_fix_head;
16063 Mfix * minipool_fix_tail;
16064 /* The fix entry for the current minipool, once it has been placed. */
16065 Mfix * minipool_barrier;
16067 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16068 #define JUMP_TABLES_IN_TEXT_SECTION 0
16069 #endif
16071 static HOST_WIDE_INT
16072 get_jump_table_size (rtx_jump_table_data *insn)
16074 /* ADDR_VECs only take room if read-only data does into the text
16075 section. */
16076 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16078 rtx body = PATTERN (insn);
16079 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16080 HOST_WIDE_INT size;
16081 HOST_WIDE_INT modesize;
16083 modesize = GET_MODE_SIZE (GET_MODE (body));
16084 size = modesize * XVECLEN (body, elt);
16085 switch (modesize)
16087 case 1:
16088 /* Round up size of TBB table to a halfword boundary. */
16089 size = (size + 1) & ~(HOST_WIDE_INT)1;
16090 break;
16091 case 2:
16092 /* No padding necessary for TBH. */
16093 break;
16094 case 4:
16095 /* Add two bytes for alignment on Thumb. */
16096 if (TARGET_THUMB)
16097 size += 2;
16098 break;
16099 default:
16100 gcc_unreachable ();
16102 return size;
16105 return 0;
16108 /* Return the maximum amount of padding that will be inserted before
16109 label LABEL. */
16111 static HOST_WIDE_INT
16112 get_label_padding (rtx label)
16114 HOST_WIDE_INT align, min_insn_size;
16116 align = 1 << label_to_alignment (label);
16117 min_insn_size = TARGET_THUMB ? 2 : 4;
16118 return align > min_insn_size ? align - min_insn_size : 0;
16121 /* Move a minipool fix MP from its current location to before MAX_MP.
16122 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16123 constraints may need updating. */
16124 static Mnode *
16125 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16126 HOST_WIDE_INT max_address)
16128 /* The code below assumes these are different. */
16129 gcc_assert (mp != max_mp);
16131 if (max_mp == NULL)
16133 if (max_address < mp->max_address)
16134 mp->max_address = max_address;
16136 else
16138 if (max_address > max_mp->max_address - mp->fix_size)
16139 mp->max_address = max_mp->max_address - mp->fix_size;
16140 else
16141 mp->max_address = max_address;
16143 /* Unlink MP from its current position. Since max_mp is non-null,
16144 mp->prev must be non-null. */
16145 mp->prev->next = mp->next;
16146 if (mp->next != NULL)
16147 mp->next->prev = mp->prev;
16148 else
16149 minipool_vector_tail = mp->prev;
16151 /* Re-insert it before MAX_MP. */
16152 mp->next = max_mp;
16153 mp->prev = max_mp->prev;
16154 max_mp->prev = mp;
16156 if (mp->prev != NULL)
16157 mp->prev->next = mp;
16158 else
16159 minipool_vector_head = mp;
16162 /* Save the new entry. */
16163 max_mp = mp;
16165 /* Scan over the preceding entries and adjust their addresses as
16166 required. */
16167 while (mp->prev != NULL
16168 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16170 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16171 mp = mp->prev;
16174 return max_mp;
16177 /* Add a constant to the minipool for a forward reference. Returns the
16178 node added or NULL if the constant will not fit in this pool. */
16179 static Mnode *
16180 add_minipool_forward_ref (Mfix *fix)
16182 /* If set, max_mp is the first pool_entry that has a lower
16183 constraint than the one we are trying to add. */
16184 Mnode * max_mp = NULL;
16185 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16186 Mnode * mp;
16188 /* If the minipool starts before the end of FIX->INSN then this FIX
16189 can not be placed into the current pool. Furthermore, adding the
16190 new constant pool entry may cause the pool to start FIX_SIZE bytes
16191 earlier. */
16192 if (minipool_vector_head &&
16193 (fix->address + get_attr_length (fix->insn)
16194 >= minipool_vector_head->max_address - fix->fix_size))
16195 return NULL;
16197 /* Scan the pool to see if a constant with the same value has
16198 already been added. While we are doing this, also note the
16199 location where we must insert the constant if it doesn't already
16200 exist. */
16201 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16203 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16204 && fix->mode == mp->mode
16205 && (!LABEL_P (fix->value)
16206 || (CODE_LABEL_NUMBER (fix->value)
16207 == CODE_LABEL_NUMBER (mp->value)))
16208 && rtx_equal_p (fix->value, mp->value))
16210 /* More than one fix references this entry. */
16211 mp->refcount++;
16212 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16215 /* Note the insertion point if necessary. */
16216 if (max_mp == NULL
16217 && mp->max_address > max_address)
16218 max_mp = mp;
16220 /* If we are inserting an 8-bytes aligned quantity and
16221 we have not already found an insertion point, then
16222 make sure that all such 8-byte aligned quantities are
16223 placed at the start of the pool. */
16224 if (ARM_DOUBLEWORD_ALIGN
16225 && max_mp == NULL
16226 && fix->fix_size >= 8
16227 && mp->fix_size < 8)
16229 max_mp = mp;
16230 max_address = mp->max_address;
16234 /* The value is not currently in the minipool, so we need to create
16235 a new entry for it. If MAX_MP is NULL, the entry will be put on
16236 the end of the list since the placement is less constrained than
16237 any existing entry. Otherwise, we insert the new fix before
16238 MAX_MP and, if necessary, adjust the constraints on the other
16239 entries. */
16240 mp = XNEW (Mnode);
16241 mp->fix_size = fix->fix_size;
16242 mp->mode = fix->mode;
16243 mp->value = fix->value;
16244 mp->refcount = 1;
16245 /* Not yet required for a backwards ref. */
16246 mp->min_address = -65536;
16248 if (max_mp == NULL)
16250 mp->max_address = max_address;
16251 mp->next = NULL;
16252 mp->prev = minipool_vector_tail;
16254 if (mp->prev == NULL)
16256 minipool_vector_head = mp;
16257 minipool_vector_label = gen_label_rtx ();
16259 else
16260 mp->prev->next = mp;
16262 minipool_vector_tail = mp;
16264 else
16266 if (max_address > max_mp->max_address - mp->fix_size)
16267 mp->max_address = max_mp->max_address - mp->fix_size;
16268 else
16269 mp->max_address = max_address;
16271 mp->next = max_mp;
16272 mp->prev = max_mp->prev;
16273 max_mp->prev = mp;
16274 if (mp->prev != NULL)
16275 mp->prev->next = mp;
16276 else
16277 minipool_vector_head = mp;
16280 /* Save the new entry. */
16281 max_mp = mp;
16283 /* Scan over the preceding entries and adjust their addresses as
16284 required. */
16285 while (mp->prev != NULL
16286 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16288 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16289 mp = mp->prev;
16292 return max_mp;
16295 static Mnode *
16296 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16297 HOST_WIDE_INT min_address)
16299 HOST_WIDE_INT offset;
16301 /* The code below assumes these are different. */
16302 gcc_assert (mp != min_mp);
16304 if (min_mp == NULL)
16306 if (min_address > mp->min_address)
16307 mp->min_address = min_address;
16309 else
16311 /* We will adjust this below if it is too loose. */
16312 mp->min_address = min_address;
16314 /* Unlink MP from its current position. Since min_mp is non-null,
16315 mp->next must be non-null. */
16316 mp->next->prev = mp->prev;
16317 if (mp->prev != NULL)
16318 mp->prev->next = mp->next;
16319 else
16320 minipool_vector_head = mp->next;
16322 /* Reinsert it after MIN_MP. */
16323 mp->prev = min_mp;
16324 mp->next = min_mp->next;
16325 min_mp->next = mp;
16326 if (mp->next != NULL)
16327 mp->next->prev = mp;
16328 else
16329 minipool_vector_tail = mp;
16332 min_mp = mp;
16334 offset = 0;
16335 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16337 mp->offset = offset;
16338 if (mp->refcount > 0)
16339 offset += mp->fix_size;
16341 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16342 mp->next->min_address = mp->min_address + mp->fix_size;
16345 return min_mp;
16348 /* Add a constant to the minipool for a backward reference. Returns the
16349 node added or NULL if the constant will not fit in this pool.
16351 Note that the code for insertion for a backwards reference can be
16352 somewhat confusing because the calculated offsets for each fix do
16353 not take into account the size of the pool (which is still under
16354 construction. */
16355 static Mnode *
16356 add_minipool_backward_ref (Mfix *fix)
16358 /* If set, min_mp is the last pool_entry that has a lower constraint
16359 than the one we are trying to add. */
16360 Mnode *min_mp = NULL;
16361 /* This can be negative, since it is only a constraint. */
16362 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16363 Mnode *mp;
16365 /* If we can't reach the current pool from this insn, or if we can't
16366 insert this entry at the end of the pool without pushing other
16367 fixes out of range, then we don't try. This ensures that we
16368 can't fail later on. */
16369 if (min_address >= minipool_barrier->address
16370 || (minipool_vector_tail->min_address + fix->fix_size
16371 >= minipool_barrier->address))
16372 return NULL;
16374 /* Scan the pool to see if a constant with the same value has
16375 already been added. While we are doing this, also note the
16376 location where we must insert the constant if it doesn't already
16377 exist. */
16378 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16380 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16381 && fix->mode == mp->mode
16382 && (!LABEL_P (fix->value)
16383 || (CODE_LABEL_NUMBER (fix->value)
16384 == CODE_LABEL_NUMBER (mp->value)))
16385 && rtx_equal_p (fix->value, mp->value)
16386 /* Check that there is enough slack to move this entry to the
16387 end of the table (this is conservative). */
16388 && (mp->max_address
16389 > (minipool_barrier->address
16390 + minipool_vector_tail->offset
16391 + minipool_vector_tail->fix_size)))
16393 mp->refcount++;
16394 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16397 if (min_mp != NULL)
16398 mp->min_address += fix->fix_size;
16399 else
16401 /* Note the insertion point if necessary. */
16402 if (mp->min_address < min_address)
16404 /* For now, we do not allow the insertion of 8-byte alignment
16405 requiring nodes anywhere but at the start of the pool. */
16406 if (ARM_DOUBLEWORD_ALIGN
16407 && fix->fix_size >= 8 && mp->fix_size < 8)
16408 return NULL;
16409 else
16410 min_mp = mp;
16412 else if (mp->max_address
16413 < minipool_barrier->address + mp->offset + fix->fix_size)
16415 /* Inserting before this entry would push the fix beyond
16416 its maximum address (which can happen if we have
16417 re-located a forwards fix); force the new fix to come
16418 after it. */
16419 if (ARM_DOUBLEWORD_ALIGN
16420 && fix->fix_size >= 8 && mp->fix_size < 8)
16421 return NULL;
16422 else
16424 min_mp = mp;
16425 min_address = mp->min_address + fix->fix_size;
16428 /* Do not insert a non-8-byte aligned quantity before 8-byte
16429 aligned quantities. */
16430 else if (ARM_DOUBLEWORD_ALIGN
16431 && fix->fix_size < 8
16432 && mp->fix_size >= 8)
16434 min_mp = mp;
16435 min_address = mp->min_address + fix->fix_size;
16440 /* We need to create a new entry. */
16441 mp = XNEW (Mnode);
16442 mp->fix_size = fix->fix_size;
16443 mp->mode = fix->mode;
16444 mp->value = fix->value;
16445 mp->refcount = 1;
16446 mp->max_address = minipool_barrier->address + 65536;
16448 mp->min_address = min_address;
16450 if (min_mp == NULL)
16452 mp->prev = NULL;
16453 mp->next = minipool_vector_head;
16455 if (mp->next == NULL)
16457 minipool_vector_tail = mp;
16458 minipool_vector_label = gen_label_rtx ();
16460 else
16461 mp->next->prev = mp;
16463 minipool_vector_head = mp;
16465 else
16467 mp->next = min_mp->next;
16468 mp->prev = min_mp;
16469 min_mp->next = mp;
16471 if (mp->next != NULL)
16472 mp->next->prev = mp;
16473 else
16474 minipool_vector_tail = mp;
16477 /* Save the new entry. */
16478 min_mp = mp;
16480 if (mp->prev)
16481 mp = mp->prev;
16482 else
16483 mp->offset = 0;
16485 /* Scan over the following entries and adjust their offsets. */
16486 while (mp->next != NULL)
16488 if (mp->next->min_address < mp->min_address + mp->fix_size)
16489 mp->next->min_address = mp->min_address + mp->fix_size;
16491 if (mp->refcount)
16492 mp->next->offset = mp->offset + mp->fix_size;
16493 else
16494 mp->next->offset = mp->offset;
16496 mp = mp->next;
16499 return min_mp;
16502 static void
16503 assign_minipool_offsets (Mfix *barrier)
16505 HOST_WIDE_INT offset = 0;
16506 Mnode *mp;
16508 minipool_barrier = barrier;
16510 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16512 mp->offset = offset;
16514 if (mp->refcount > 0)
16515 offset += mp->fix_size;
16519 /* Output the literal table */
16520 static void
16521 dump_minipool (rtx_insn *scan)
16523 Mnode * mp;
16524 Mnode * nmp;
16525 int align64 = 0;
16527 if (ARM_DOUBLEWORD_ALIGN)
16528 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16529 if (mp->refcount > 0 && mp->fix_size >= 8)
16531 align64 = 1;
16532 break;
16535 if (dump_file)
16536 fprintf (dump_file,
16537 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16538 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16540 scan = emit_label_after (gen_label_rtx (), scan);
16541 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16542 scan = emit_label_after (minipool_vector_label, scan);
16544 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16546 if (mp->refcount > 0)
16548 if (dump_file)
16550 fprintf (dump_file,
16551 ";; Offset %u, min %ld, max %ld ",
16552 (unsigned) mp->offset, (unsigned long) mp->min_address,
16553 (unsigned long) mp->max_address);
16554 arm_print_value (dump_file, mp->value);
16555 fputc ('\n', dump_file);
16558 switch (GET_MODE_SIZE (mp->mode))
16560 #ifdef HAVE_consttable_1
16561 case 1:
16562 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16563 break;
16565 #endif
16566 #ifdef HAVE_consttable_2
16567 case 2:
16568 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16569 break;
16571 #endif
16572 #ifdef HAVE_consttable_4
16573 case 4:
16574 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16575 break;
16577 #endif
16578 #ifdef HAVE_consttable_8
16579 case 8:
16580 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16581 break;
16583 #endif
16584 #ifdef HAVE_consttable_16
16585 case 16:
16586 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16587 break;
16589 #endif
16590 default:
16591 gcc_unreachable ();
16595 nmp = mp->next;
16596 free (mp);
16599 minipool_vector_head = minipool_vector_tail = NULL;
16600 scan = emit_insn_after (gen_consttable_end (), scan);
16601 scan = emit_barrier_after (scan);
16604 /* Return the cost of forcibly inserting a barrier after INSN. */
16605 static int
16606 arm_barrier_cost (rtx_insn *insn)
16608 /* Basing the location of the pool on the loop depth is preferable,
16609 but at the moment, the basic block information seems to be
16610 corrupt by this stage of the compilation. */
16611 int base_cost = 50;
16612 rtx_insn *next = next_nonnote_insn (insn);
16614 if (next != NULL && LABEL_P (next))
16615 base_cost -= 20;
16617 switch (GET_CODE (insn))
16619 case CODE_LABEL:
16620 /* It will always be better to place the table before the label, rather
16621 than after it. */
16622 return 50;
16624 case INSN:
16625 case CALL_INSN:
16626 return base_cost;
16628 case JUMP_INSN:
16629 return base_cost - 10;
16631 default:
16632 return base_cost + 10;
16636 /* Find the best place in the insn stream in the range
16637 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16638 Create the barrier by inserting a jump and add a new fix entry for
16639 it. */
16640 static Mfix *
16641 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16643 HOST_WIDE_INT count = 0;
16644 rtx_barrier *barrier;
16645 rtx_insn *from = fix->insn;
16646 /* The instruction after which we will insert the jump. */
16647 rtx_insn *selected = NULL;
16648 int selected_cost;
16649 /* The address at which the jump instruction will be placed. */
16650 HOST_WIDE_INT selected_address;
16651 Mfix * new_fix;
16652 HOST_WIDE_INT max_count = max_address - fix->address;
16653 rtx_code_label *label = gen_label_rtx ();
16655 selected_cost = arm_barrier_cost (from);
16656 selected_address = fix->address;
16658 while (from && count < max_count)
16660 rtx_jump_table_data *tmp;
16661 int new_cost;
16663 /* This code shouldn't have been called if there was a natural barrier
16664 within range. */
16665 gcc_assert (!BARRIER_P (from));
16667 /* Count the length of this insn. This must stay in sync with the
16668 code that pushes minipool fixes. */
16669 if (LABEL_P (from))
16670 count += get_label_padding (from);
16671 else
16672 count += get_attr_length (from);
16674 /* If there is a jump table, add its length. */
16675 if (tablejump_p (from, NULL, &tmp))
16677 count += get_jump_table_size (tmp);
16679 /* Jump tables aren't in a basic block, so base the cost on
16680 the dispatch insn. If we select this location, we will
16681 still put the pool after the table. */
16682 new_cost = arm_barrier_cost (from);
16684 if (count < max_count
16685 && (!selected || new_cost <= selected_cost))
16687 selected = tmp;
16688 selected_cost = new_cost;
16689 selected_address = fix->address + count;
16692 /* Continue after the dispatch table. */
16693 from = NEXT_INSN (tmp);
16694 continue;
16697 new_cost = arm_barrier_cost (from);
16699 if (count < max_count
16700 && (!selected || new_cost <= selected_cost))
16702 selected = from;
16703 selected_cost = new_cost;
16704 selected_address = fix->address + count;
16707 from = NEXT_INSN (from);
16710 /* Make sure that we found a place to insert the jump. */
16711 gcc_assert (selected);
16713 /* Make sure we do not split a call and its corresponding
16714 CALL_ARG_LOCATION note. */
16715 if (CALL_P (selected))
16717 rtx_insn *next = NEXT_INSN (selected);
16718 if (next && NOTE_P (next)
16719 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16720 selected = next;
16723 /* Create a new JUMP_INSN that branches around a barrier. */
16724 from = emit_jump_insn_after (gen_jump (label), selected);
16725 JUMP_LABEL (from) = label;
16726 barrier = emit_barrier_after (from);
16727 emit_label_after (label, barrier);
16729 /* Create a minipool barrier entry for the new barrier. */
16730 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16731 new_fix->insn = barrier;
16732 new_fix->address = selected_address;
16733 new_fix->next = fix->next;
16734 fix->next = new_fix;
16736 return new_fix;
16739 /* Record that there is a natural barrier in the insn stream at
16740 ADDRESS. */
16741 static void
16742 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16744 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16746 fix->insn = insn;
16747 fix->address = address;
16749 fix->next = NULL;
16750 if (minipool_fix_head != NULL)
16751 minipool_fix_tail->next = fix;
16752 else
16753 minipool_fix_head = fix;
16755 minipool_fix_tail = fix;
16758 /* Record INSN, which will need fixing up to load a value from the
16759 minipool. ADDRESS is the offset of the insn since the start of the
16760 function; LOC is a pointer to the part of the insn which requires
16761 fixing; VALUE is the constant that must be loaded, which is of type
16762 MODE. */
16763 static void
16764 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16765 machine_mode mode, rtx value)
16767 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16769 fix->insn = insn;
16770 fix->address = address;
16771 fix->loc = loc;
16772 fix->mode = mode;
16773 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16774 fix->value = value;
16775 fix->forwards = get_attr_pool_range (insn);
16776 fix->backwards = get_attr_neg_pool_range (insn);
16777 fix->minipool = NULL;
16779 /* If an insn doesn't have a range defined for it, then it isn't
16780 expecting to be reworked by this code. Better to stop now than
16781 to generate duff assembly code. */
16782 gcc_assert (fix->forwards || fix->backwards);
16784 /* If an entry requires 8-byte alignment then assume all constant pools
16785 require 4 bytes of padding. Trying to do this later on a per-pool
16786 basis is awkward because existing pool entries have to be modified. */
16787 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16788 minipool_pad = 4;
16790 if (dump_file)
16792 fprintf (dump_file,
16793 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16794 GET_MODE_NAME (mode),
16795 INSN_UID (insn), (unsigned long) address,
16796 -1 * (long)fix->backwards, (long)fix->forwards);
16797 arm_print_value (dump_file, fix->value);
16798 fprintf (dump_file, "\n");
16801 /* Add it to the chain of fixes. */
16802 fix->next = NULL;
16804 if (minipool_fix_head != NULL)
16805 minipool_fix_tail->next = fix;
16806 else
16807 minipool_fix_head = fix;
16809 minipool_fix_tail = fix;
16812 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16813 Returns the number of insns needed, or 99 if we always want to synthesize
16814 the value. */
16816 arm_max_const_double_inline_cost ()
16818 /* Let the value get synthesized to avoid the use of literal pools. */
16819 if (arm_disable_literal_pool)
16820 return 99;
16822 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16825 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16826 Returns the number of insns needed, or 99 if we don't know how to
16827 do it. */
16829 arm_const_double_inline_cost (rtx val)
16831 rtx lowpart, highpart;
16832 machine_mode mode;
16834 mode = GET_MODE (val);
16836 if (mode == VOIDmode)
16837 mode = DImode;
16839 gcc_assert (GET_MODE_SIZE (mode) == 8);
16841 lowpart = gen_lowpart (SImode, val);
16842 highpart = gen_highpart_mode (SImode, mode, val);
16844 gcc_assert (CONST_INT_P (lowpart));
16845 gcc_assert (CONST_INT_P (highpart));
16847 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16848 NULL_RTX, NULL_RTX, 0, 0)
16849 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16850 NULL_RTX, NULL_RTX, 0, 0));
16853 /* Cost of loading a SImode constant. */
16854 static inline int
16855 arm_const_inline_cost (enum rtx_code code, rtx val)
16857 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16858 NULL_RTX, NULL_RTX, 1, 0);
16861 /* Return true if it is worthwhile to split a 64-bit constant into two
16862 32-bit operations. This is the case if optimizing for size, or
16863 if we have load delay slots, or if one 32-bit part can be done with
16864 a single data operation. */
16865 bool
16866 arm_const_double_by_parts (rtx val)
16868 machine_mode mode = GET_MODE (val);
16869 rtx part;
16871 if (optimize_size || arm_ld_sched)
16872 return true;
16874 if (mode == VOIDmode)
16875 mode = DImode;
16877 part = gen_highpart_mode (SImode, mode, val);
16879 gcc_assert (CONST_INT_P (part));
16881 if (const_ok_for_arm (INTVAL (part))
16882 || const_ok_for_arm (~INTVAL (part)))
16883 return true;
16885 part = gen_lowpart (SImode, val);
16887 gcc_assert (CONST_INT_P (part));
16889 if (const_ok_for_arm (INTVAL (part))
16890 || const_ok_for_arm (~INTVAL (part)))
16891 return true;
16893 return false;
16896 /* Return true if it is possible to inline both the high and low parts
16897 of a 64-bit constant into 32-bit data processing instructions. */
16898 bool
16899 arm_const_double_by_immediates (rtx val)
16901 machine_mode mode = GET_MODE (val);
16902 rtx part;
16904 if (mode == VOIDmode)
16905 mode = DImode;
16907 part = gen_highpart_mode (SImode, mode, val);
16909 gcc_assert (CONST_INT_P (part));
16911 if (!const_ok_for_arm (INTVAL (part)))
16912 return false;
16914 part = gen_lowpart (SImode, val);
16916 gcc_assert (CONST_INT_P (part));
16918 if (!const_ok_for_arm (INTVAL (part)))
16919 return false;
16921 return true;
16924 /* Scan INSN and note any of its operands that need fixing.
16925 If DO_PUSHES is false we do not actually push any of the fixups
16926 needed. */
16927 static void
16928 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16930 int opno;
16932 extract_constrain_insn (insn);
16934 if (recog_data.n_alternatives == 0)
16935 return;
16937 /* Fill in recog_op_alt with information about the constraints of
16938 this insn. */
16939 preprocess_constraints (insn);
16941 const operand_alternative *op_alt = which_op_alt ();
16942 for (opno = 0; opno < recog_data.n_operands; opno++)
16944 /* Things we need to fix can only occur in inputs. */
16945 if (recog_data.operand_type[opno] != OP_IN)
16946 continue;
16948 /* If this alternative is a memory reference, then any mention
16949 of constants in this alternative is really to fool reload
16950 into allowing us to accept one there. We need to fix them up
16951 now so that we output the right code. */
16952 if (op_alt[opno].memory_ok)
16954 rtx op = recog_data.operand[opno];
16956 if (CONSTANT_P (op))
16958 if (do_pushes)
16959 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16960 recog_data.operand_mode[opno], op);
16962 else if (MEM_P (op)
16963 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16964 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16966 if (do_pushes)
16968 rtx cop = avoid_constant_pool_reference (op);
16970 /* Casting the address of something to a mode narrower
16971 than a word can cause avoid_constant_pool_reference()
16972 to return the pool reference itself. That's no good to
16973 us here. Lets just hope that we can use the
16974 constant pool value directly. */
16975 if (op == cop)
16976 cop = get_pool_constant (XEXP (op, 0));
16978 push_minipool_fix (insn, address,
16979 recog_data.operand_loc[opno],
16980 recog_data.operand_mode[opno], cop);
16987 return;
16990 /* Rewrite move insn into subtract of 0 if the condition codes will
16991 be useful in next conditional jump insn. */
16993 static void
16994 thumb1_reorg (void)
16996 basic_block bb;
16998 FOR_EACH_BB_FN (bb, cfun)
17000 rtx dest, src;
17001 rtx pat, op0, set = NULL;
17002 rtx_insn *prev, *insn = BB_END (bb);
17003 bool insn_clobbered = false;
17005 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17006 insn = PREV_INSN (insn);
17008 /* Find the last cbranchsi4_insn in basic block BB. */
17009 if (insn == BB_HEAD (bb)
17010 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17011 continue;
17013 /* Get the register with which we are comparing. */
17014 pat = PATTERN (insn);
17015 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
17017 /* Find the first flag setting insn before INSN in basic block BB. */
17018 gcc_assert (insn != BB_HEAD (bb));
17019 for (prev = PREV_INSN (insn);
17020 (!insn_clobbered
17021 && prev != BB_HEAD (bb)
17022 && (NOTE_P (prev)
17023 || DEBUG_INSN_P (prev)
17024 || ((set = single_set (prev)) != NULL
17025 && get_attr_conds (prev) == CONDS_NOCOND)));
17026 prev = PREV_INSN (prev))
17028 if (reg_set_p (op0, prev))
17029 insn_clobbered = true;
17032 /* Skip if op0 is clobbered by insn other than prev. */
17033 if (insn_clobbered)
17034 continue;
17036 if (!set)
17037 continue;
17039 dest = SET_DEST (set);
17040 src = SET_SRC (set);
17041 if (!low_register_operand (dest, SImode)
17042 || !low_register_operand (src, SImode))
17043 continue;
17045 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17046 in INSN. Both src and dest of the move insn are checked. */
17047 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17049 dest = copy_rtx (dest);
17050 src = copy_rtx (src);
17051 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17052 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
17053 INSN_CODE (prev) = -1;
17054 /* Set test register in INSN to dest. */
17055 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
17056 INSN_CODE (insn) = -1;
17061 /* Convert instructions to their cc-clobbering variant if possible, since
17062 that allows us to use smaller encodings. */
17064 static void
17065 thumb2_reorg (void)
17067 basic_block bb;
17068 regset_head live;
17070 INIT_REG_SET (&live);
17072 /* We are freeing block_for_insn in the toplev to keep compatibility
17073 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17074 compute_bb_for_insn ();
17075 df_analyze ();
17077 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17079 FOR_EACH_BB_FN (bb, cfun)
17081 if (current_tune->disparage_flag_setting_t16_encodings
17082 && optimize_bb_for_speed_p (bb))
17083 continue;
17085 rtx_insn *insn;
17086 Convert_Action action = SKIP;
17087 Convert_Action action_for_partial_flag_setting
17088 = (current_tune->disparage_partial_flag_setting_t16_encodings
17089 && optimize_bb_for_speed_p (bb))
17090 ? SKIP : CONV;
17092 COPY_REG_SET (&live, DF_LR_OUT (bb));
17093 df_simulate_initialize_backwards (bb, &live);
17094 FOR_BB_INSNS_REVERSE (bb, insn)
17096 if (NONJUMP_INSN_P (insn)
17097 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17098 && GET_CODE (PATTERN (insn)) == SET)
17100 action = SKIP;
17101 rtx pat = PATTERN (insn);
17102 rtx dst = XEXP (pat, 0);
17103 rtx src = XEXP (pat, 1);
17104 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17106 if (UNARY_P (src) || BINARY_P (src))
17107 op0 = XEXP (src, 0);
17109 if (BINARY_P (src))
17110 op1 = XEXP (src, 1);
17112 if (low_register_operand (dst, SImode))
17114 switch (GET_CODE (src))
17116 case PLUS:
17117 /* Adding two registers and storing the result
17118 in the first source is already a 16-bit
17119 operation. */
17120 if (rtx_equal_p (dst, op0)
17121 && register_operand (op1, SImode))
17122 break;
17124 if (low_register_operand (op0, SImode))
17126 /* ADDS <Rd>,<Rn>,<Rm> */
17127 if (low_register_operand (op1, SImode))
17128 action = CONV;
17129 /* ADDS <Rdn>,#<imm8> */
17130 /* SUBS <Rdn>,#<imm8> */
17131 else if (rtx_equal_p (dst, op0)
17132 && CONST_INT_P (op1)
17133 && IN_RANGE (INTVAL (op1), -255, 255))
17134 action = CONV;
17135 /* ADDS <Rd>,<Rn>,#<imm3> */
17136 /* SUBS <Rd>,<Rn>,#<imm3> */
17137 else if (CONST_INT_P (op1)
17138 && IN_RANGE (INTVAL (op1), -7, 7))
17139 action = CONV;
17141 /* ADCS <Rd>, <Rn> */
17142 else if (GET_CODE (XEXP (src, 0)) == PLUS
17143 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17144 && low_register_operand (XEXP (XEXP (src, 0), 1),
17145 SImode)
17146 && COMPARISON_P (op1)
17147 && cc_register (XEXP (op1, 0), VOIDmode)
17148 && maybe_get_arm_condition_code (op1) == ARM_CS
17149 && XEXP (op1, 1) == const0_rtx)
17150 action = CONV;
17151 break;
17153 case MINUS:
17154 /* RSBS <Rd>,<Rn>,#0
17155 Not handled here: see NEG below. */
17156 /* SUBS <Rd>,<Rn>,#<imm3>
17157 SUBS <Rdn>,#<imm8>
17158 Not handled here: see PLUS above. */
17159 /* SUBS <Rd>,<Rn>,<Rm> */
17160 if (low_register_operand (op0, SImode)
17161 && low_register_operand (op1, SImode))
17162 action = CONV;
17163 break;
17165 case MULT:
17166 /* MULS <Rdm>,<Rn>,<Rdm>
17167 As an exception to the rule, this is only used
17168 when optimizing for size since MULS is slow on all
17169 known implementations. We do not even want to use
17170 MULS in cold code, if optimizing for speed, so we
17171 test the global flag here. */
17172 if (!optimize_size)
17173 break;
17174 /* else fall through. */
17175 case AND:
17176 case IOR:
17177 case XOR:
17178 /* ANDS <Rdn>,<Rm> */
17179 if (rtx_equal_p (dst, op0)
17180 && low_register_operand (op1, SImode))
17181 action = action_for_partial_flag_setting;
17182 else if (rtx_equal_p (dst, op1)
17183 && low_register_operand (op0, SImode))
17184 action = action_for_partial_flag_setting == SKIP
17185 ? SKIP : SWAP_CONV;
17186 break;
17188 case ASHIFTRT:
17189 case ASHIFT:
17190 case LSHIFTRT:
17191 /* ASRS <Rdn>,<Rm> */
17192 /* LSRS <Rdn>,<Rm> */
17193 /* LSLS <Rdn>,<Rm> */
17194 if (rtx_equal_p (dst, op0)
17195 && low_register_operand (op1, SImode))
17196 action = action_for_partial_flag_setting;
17197 /* ASRS <Rd>,<Rm>,#<imm5> */
17198 /* LSRS <Rd>,<Rm>,#<imm5> */
17199 /* LSLS <Rd>,<Rm>,#<imm5> */
17200 else if (low_register_operand (op0, SImode)
17201 && CONST_INT_P (op1)
17202 && IN_RANGE (INTVAL (op1), 0, 31))
17203 action = action_for_partial_flag_setting;
17204 break;
17206 case ROTATERT:
17207 /* RORS <Rdn>,<Rm> */
17208 if (rtx_equal_p (dst, op0)
17209 && low_register_operand (op1, SImode))
17210 action = action_for_partial_flag_setting;
17211 break;
17213 case NOT:
17214 /* MVNS <Rd>,<Rm> */
17215 if (low_register_operand (op0, SImode))
17216 action = action_for_partial_flag_setting;
17217 break;
17219 case NEG:
17220 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17221 if (low_register_operand (op0, SImode))
17222 action = CONV;
17223 break;
17225 case CONST_INT:
17226 /* MOVS <Rd>,#<imm8> */
17227 if (CONST_INT_P (src)
17228 && IN_RANGE (INTVAL (src), 0, 255))
17229 action = action_for_partial_flag_setting;
17230 break;
17232 case REG:
17233 /* MOVS and MOV<c> with registers have different
17234 encodings, so are not relevant here. */
17235 break;
17237 default:
17238 break;
17242 if (action != SKIP)
17244 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17245 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17246 rtvec vec;
17248 if (action == SWAP_CONV)
17250 src = copy_rtx (src);
17251 XEXP (src, 0) = op1;
17252 XEXP (src, 1) = op0;
17253 pat = gen_rtx_SET (VOIDmode, dst, src);
17254 vec = gen_rtvec (2, pat, clobber);
17256 else /* action == CONV */
17257 vec = gen_rtvec (2, pat, clobber);
17259 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17260 INSN_CODE (insn) = -1;
17264 if (NONDEBUG_INSN_P (insn))
17265 df_simulate_one_insn_backwards (bb, insn, &live);
17269 CLEAR_REG_SET (&live);
17272 /* Gcc puts the pool in the wrong place for ARM, since we can only
17273 load addresses a limited distance around the pc. We do some
17274 special munging to move the constant pool values to the correct
17275 point in the code. */
17276 static void
17277 arm_reorg (void)
17279 rtx_insn *insn;
17280 HOST_WIDE_INT address = 0;
17281 Mfix * fix;
17283 if (TARGET_THUMB1)
17284 thumb1_reorg ();
17285 else if (TARGET_THUMB2)
17286 thumb2_reorg ();
17288 /* Ensure all insns that must be split have been split at this point.
17289 Otherwise, the pool placement code below may compute incorrect
17290 insn lengths. Note that when optimizing, all insns have already
17291 been split at this point. */
17292 if (!optimize)
17293 split_all_insns_noflow ();
17295 minipool_fix_head = minipool_fix_tail = NULL;
17297 /* The first insn must always be a note, or the code below won't
17298 scan it properly. */
17299 insn = get_insns ();
17300 gcc_assert (NOTE_P (insn));
17301 minipool_pad = 0;
17303 /* Scan all the insns and record the operands that will need fixing. */
17304 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17306 if (BARRIER_P (insn))
17307 push_minipool_barrier (insn, address);
17308 else if (INSN_P (insn))
17310 rtx_jump_table_data *table;
17312 note_invalid_constants (insn, address, true);
17313 address += get_attr_length (insn);
17315 /* If the insn is a vector jump, add the size of the table
17316 and skip the table. */
17317 if (tablejump_p (insn, NULL, &table))
17319 address += get_jump_table_size (table);
17320 insn = table;
17323 else if (LABEL_P (insn))
17324 /* Add the worst-case padding due to alignment. We don't add
17325 the _current_ padding because the minipool insertions
17326 themselves might change it. */
17327 address += get_label_padding (insn);
17330 fix = minipool_fix_head;
17332 /* Now scan the fixups and perform the required changes. */
17333 while (fix)
17335 Mfix * ftmp;
17336 Mfix * fdel;
17337 Mfix * last_added_fix;
17338 Mfix * last_barrier = NULL;
17339 Mfix * this_fix;
17341 /* Skip any further barriers before the next fix. */
17342 while (fix && BARRIER_P (fix->insn))
17343 fix = fix->next;
17345 /* No more fixes. */
17346 if (fix == NULL)
17347 break;
17349 last_added_fix = NULL;
17351 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17353 if (BARRIER_P (ftmp->insn))
17355 if (ftmp->address >= minipool_vector_head->max_address)
17356 break;
17358 last_barrier = ftmp;
17360 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17361 break;
17363 last_added_fix = ftmp; /* Keep track of the last fix added. */
17366 /* If we found a barrier, drop back to that; any fixes that we
17367 could have reached but come after the barrier will now go in
17368 the next mini-pool. */
17369 if (last_barrier != NULL)
17371 /* Reduce the refcount for those fixes that won't go into this
17372 pool after all. */
17373 for (fdel = last_barrier->next;
17374 fdel && fdel != ftmp;
17375 fdel = fdel->next)
17377 fdel->minipool->refcount--;
17378 fdel->minipool = NULL;
17381 ftmp = last_barrier;
17383 else
17385 /* ftmp is first fix that we can't fit into this pool and
17386 there no natural barriers that we could use. Insert a
17387 new barrier in the code somewhere between the previous
17388 fix and this one, and arrange to jump around it. */
17389 HOST_WIDE_INT max_address;
17391 /* The last item on the list of fixes must be a barrier, so
17392 we can never run off the end of the list of fixes without
17393 last_barrier being set. */
17394 gcc_assert (ftmp);
17396 max_address = minipool_vector_head->max_address;
17397 /* Check that there isn't another fix that is in range that
17398 we couldn't fit into this pool because the pool was
17399 already too large: we need to put the pool before such an
17400 instruction. The pool itself may come just after the
17401 fix because create_fix_barrier also allows space for a
17402 jump instruction. */
17403 if (ftmp->address < max_address)
17404 max_address = ftmp->address + 1;
17406 last_barrier = create_fix_barrier (last_added_fix, max_address);
17409 assign_minipool_offsets (last_barrier);
17411 while (ftmp)
17413 if (!BARRIER_P (ftmp->insn)
17414 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17415 == NULL))
17416 break;
17418 ftmp = ftmp->next;
17421 /* Scan over the fixes we have identified for this pool, fixing them
17422 up and adding the constants to the pool itself. */
17423 for (this_fix = fix; this_fix && ftmp != this_fix;
17424 this_fix = this_fix->next)
17425 if (!BARRIER_P (this_fix->insn))
17427 rtx addr
17428 = plus_constant (Pmode,
17429 gen_rtx_LABEL_REF (VOIDmode,
17430 minipool_vector_label),
17431 this_fix->minipool->offset);
17432 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17435 dump_minipool (last_barrier->insn);
17436 fix = ftmp;
17439 /* From now on we must synthesize any constants that we can't handle
17440 directly. This can happen if the RTL gets split during final
17441 instruction generation. */
17442 cfun->machine->after_arm_reorg = 1;
17444 /* Free the minipool memory. */
17445 obstack_free (&minipool_obstack, minipool_startobj);
17448 /* Routines to output assembly language. */
17450 /* Return string representation of passed in real value. */
17451 static const char *
17452 fp_const_from_val (REAL_VALUE_TYPE *r)
17454 if (!fp_consts_inited)
17455 init_fp_table ();
17457 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17458 return "0";
17461 /* OPERANDS[0] is the entire list of insns that constitute pop,
17462 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17463 is in the list, UPDATE is true iff the list contains explicit
17464 update of base register. */
17465 void
17466 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17467 bool update)
17469 int i;
17470 char pattern[100];
17471 int offset;
17472 const char *conditional;
17473 int num_saves = XVECLEN (operands[0], 0);
17474 unsigned int regno;
17475 unsigned int regno_base = REGNO (operands[1]);
17477 offset = 0;
17478 offset += update ? 1 : 0;
17479 offset += return_pc ? 1 : 0;
17481 /* Is the base register in the list? */
17482 for (i = offset; i < num_saves; i++)
17484 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17485 /* If SP is in the list, then the base register must be SP. */
17486 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17487 /* If base register is in the list, there must be no explicit update. */
17488 if (regno == regno_base)
17489 gcc_assert (!update);
17492 conditional = reverse ? "%?%D0" : "%?%d0";
17493 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17495 /* Output pop (not stmfd) because it has a shorter encoding. */
17496 gcc_assert (update);
17497 sprintf (pattern, "pop%s\t{", conditional);
17499 else
17501 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17502 It's just a convention, their semantics are identical. */
17503 if (regno_base == SP_REGNUM)
17504 sprintf (pattern, "ldm%sfd\t", conditional);
17505 else if (TARGET_UNIFIED_ASM)
17506 sprintf (pattern, "ldmia%s\t", conditional);
17507 else
17508 sprintf (pattern, "ldm%sia\t", conditional);
17510 strcat (pattern, reg_names[regno_base]);
17511 if (update)
17512 strcat (pattern, "!, {");
17513 else
17514 strcat (pattern, ", {");
17517 /* Output the first destination register. */
17518 strcat (pattern,
17519 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17521 /* Output the rest of the destination registers. */
17522 for (i = offset + 1; i < num_saves; i++)
17524 strcat (pattern, ", ");
17525 strcat (pattern,
17526 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17529 strcat (pattern, "}");
17531 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17532 strcat (pattern, "^");
17534 output_asm_insn (pattern, &cond);
17538 /* Output the assembly for a store multiple. */
17540 const char *
17541 vfp_output_vstmd (rtx * operands)
17543 char pattern[100];
17544 int p;
17545 int base;
17546 int i;
17547 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17548 ? XEXP (operands[0], 0)
17549 : XEXP (XEXP (operands[0], 0), 0);
17550 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17552 if (push_p)
17553 strcpy (pattern, "vpush%?.64\t{%P1");
17554 else
17555 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17557 p = strlen (pattern);
17559 gcc_assert (REG_P (operands[1]));
17561 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17562 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17564 p += sprintf (&pattern[p], ", d%d", base + i);
17566 strcpy (&pattern[p], "}");
17568 output_asm_insn (pattern, operands);
17569 return "";
17573 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17574 number of bytes pushed. */
17576 static int
17577 vfp_emit_fstmd (int base_reg, int count)
17579 rtx par;
17580 rtx dwarf;
17581 rtx tmp, reg;
17582 int i;
17584 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17585 register pairs are stored by a store multiple insn. We avoid this
17586 by pushing an extra pair. */
17587 if (count == 2 && !arm_arch6)
17589 if (base_reg == LAST_VFP_REGNUM - 3)
17590 base_reg -= 2;
17591 count++;
17594 /* FSTMD may not store more than 16 doubleword registers at once. Split
17595 larger stores into multiple parts (up to a maximum of two, in
17596 practice). */
17597 if (count > 16)
17599 int saved;
17600 /* NOTE: base_reg is an internal register number, so each D register
17601 counts as 2. */
17602 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17603 saved += vfp_emit_fstmd (base_reg, 16);
17604 return saved;
17607 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17608 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17610 reg = gen_rtx_REG (DFmode, base_reg);
17611 base_reg += 2;
17613 XVECEXP (par, 0, 0)
17614 = gen_rtx_SET (VOIDmode,
17615 gen_frame_mem
17616 (BLKmode,
17617 gen_rtx_PRE_MODIFY (Pmode,
17618 stack_pointer_rtx,
17619 plus_constant
17620 (Pmode, stack_pointer_rtx,
17621 - (count * 8)))
17623 gen_rtx_UNSPEC (BLKmode,
17624 gen_rtvec (1, reg),
17625 UNSPEC_PUSH_MULT));
17627 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17628 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17629 RTX_FRAME_RELATED_P (tmp) = 1;
17630 XVECEXP (dwarf, 0, 0) = tmp;
17632 tmp = gen_rtx_SET (VOIDmode,
17633 gen_frame_mem (DFmode, stack_pointer_rtx),
17634 reg);
17635 RTX_FRAME_RELATED_P (tmp) = 1;
17636 XVECEXP (dwarf, 0, 1) = tmp;
17638 for (i = 1; i < count; i++)
17640 reg = gen_rtx_REG (DFmode, base_reg);
17641 base_reg += 2;
17642 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17644 tmp = gen_rtx_SET (VOIDmode,
17645 gen_frame_mem (DFmode,
17646 plus_constant (Pmode,
17647 stack_pointer_rtx,
17648 i * 8)),
17649 reg);
17650 RTX_FRAME_RELATED_P (tmp) = 1;
17651 XVECEXP (dwarf, 0, i + 1) = tmp;
17654 par = emit_insn (par);
17655 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17656 RTX_FRAME_RELATED_P (par) = 1;
17658 return count * 8;
17661 /* Emit a call instruction with pattern PAT. ADDR is the address of
17662 the call target. */
17664 void
17665 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17667 rtx insn;
17669 insn = emit_call_insn (pat);
17671 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17672 If the call might use such an entry, add a use of the PIC register
17673 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17674 if (TARGET_VXWORKS_RTP
17675 && flag_pic
17676 && !sibcall
17677 && GET_CODE (addr) == SYMBOL_REF
17678 && (SYMBOL_REF_DECL (addr)
17679 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17680 : !SYMBOL_REF_LOCAL_P (addr)))
17682 require_pic_register ();
17683 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17686 if (TARGET_AAPCS_BASED)
17688 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17689 linker. We need to add an IP clobber to allow setting
17690 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17691 is not needed since it's a fixed register. */
17692 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17693 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17697 /* Output a 'call' insn. */
17698 const char *
17699 output_call (rtx *operands)
17701 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17703 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17704 if (REGNO (operands[0]) == LR_REGNUM)
17706 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17707 output_asm_insn ("mov%?\t%0, %|lr", operands);
17710 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17712 if (TARGET_INTERWORK || arm_arch4t)
17713 output_asm_insn ("bx%?\t%0", operands);
17714 else
17715 output_asm_insn ("mov%?\t%|pc, %0", operands);
17717 return "";
17720 /* Output a 'call' insn that is a reference in memory. This is
17721 disabled for ARMv5 and we prefer a blx instead because otherwise
17722 there's a significant performance overhead. */
17723 const char *
17724 output_call_mem (rtx *operands)
17726 gcc_assert (!arm_arch5);
17727 if (TARGET_INTERWORK)
17729 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17730 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17731 output_asm_insn ("bx%?\t%|ip", operands);
17733 else if (regno_use_in (LR_REGNUM, operands[0]))
17735 /* LR is used in the memory address. We load the address in the
17736 first instruction. It's safe to use IP as the target of the
17737 load since the call will kill it anyway. */
17738 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17739 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17740 if (arm_arch4t)
17741 output_asm_insn ("bx%?\t%|ip", operands);
17742 else
17743 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17745 else
17747 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17748 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17751 return "";
17755 /* Output a move from arm registers to arm registers of a long double
17756 OPERANDS[0] is the destination.
17757 OPERANDS[1] is the source. */
17758 const char *
17759 output_mov_long_double_arm_from_arm (rtx *operands)
17761 /* We have to be careful here because the two might overlap. */
17762 int dest_start = REGNO (operands[0]);
17763 int src_start = REGNO (operands[1]);
17764 rtx ops[2];
17765 int i;
17767 if (dest_start < src_start)
17769 for (i = 0; i < 3; i++)
17771 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17772 ops[1] = gen_rtx_REG (SImode, src_start + i);
17773 output_asm_insn ("mov%?\t%0, %1", ops);
17776 else
17778 for (i = 2; i >= 0; i--)
17780 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17781 ops[1] = gen_rtx_REG (SImode, src_start + i);
17782 output_asm_insn ("mov%?\t%0, %1", ops);
17786 return "";
17789 void
17790 arm_emit_movpair (rtx dest, rtx src)
17792 /* If the src is an immediate, simplify it. */
17793 if (CONST_INT_P (src))
17795 HOST_WIDE_INT val = INTVAL (src);
17796 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17797 if ((val >> 16) & 0x0000ffff)
17798 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17799 GEN_INT (16)),
17800 GEN_INT ((val >> 16) & 0x0000ffff));
17801 return;
17803 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17804 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17807 /* Output a move between double words. It must be REG<-MEM
17808 or MEM<-REG. */
17809 const char *
17810 output_move_double (rtx *operands, bool emit, int *count)
17812 enum rtx_code code0 = GET_CODE (operands[0]);
17813 enum rtx_code code1 = GET_CODE (operands[1]);
17814 rtx otherops[3];
17815 if (count)
17816 *count = 1;
17818 /* The only case when this might happen is when
17819 you are looking at the length of a DImode instruction
17820 that has an invalid constant in it. */
17821 if (code0 == REG && code1 != MEM)
17823 gcc_assert (!emit);
17824 *count = 2;
17825 return "";
17828 if (code0 == REG)
17830 unsigned int reg0 = REGNO (operands[0]);
17832 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17834 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17836 switch (GET_CODE (XEXP (operands[1], 0)))
17838 case REG:
17840 if (emit)
17842 if (TARGET_LDRD
17843 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17844 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17845 else
17846 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17848 break;
17850 case PRE_INC:
17851 gcc_assert (TARGET_LDRD);
17852 if (emit)
17853 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17854 break;
17856 case PRE_DEC:
17857 if (emit)
17859 if (TARGET_LDRD)
17860 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17861 else
17862 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17864 break;
17866 case POST_INC:
17867 if (emit)
17869 if (TARGET_LDRD)
17870 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17871 else
17872 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17874 break;
17876 case POST_DEC:
17877 gcc_assert (TARGET_LDRD);
17878 if (emit)
17879 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17880 break;
17882 case PRE_MODIFY:
17883 case POST_MODIFY:
17884 /* Autoicrement addressing modes should never have overlapping
17885 base and destination registers, and overlapping index registers
17886 are already prohibited, so this doesn't need to worry about
17887 fix_cm3_ldrd. */
17888 otherops[0] = operands[0];
17889 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17890 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17892 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17894 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17896 /* Registers overlap so split out the increment. */
17897 if (emit)
17899 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17900 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
17902 if (count)
17903 *count = 2;
17905 else
17907 /* Use a single insn if we can.
17908 FIXME: IWMMXT allows offsets larger than ldrd can
17909 handle, fix these up with a pair of ldr. */
17910 if (TARGET_THUMB2
17911 || !CONST_INT_P (otherops[2])
17912 || (INTVAL (otherops[2]) > -256
17913 && INTVAL (otherops[2]) < 256))
17915 if (emit)
17916 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
17918 else
17920 if (emit)
17922 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
17923 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17925 if (count)
17926 *count = 2;
17931 else
17933 /* Use a single insn if we can.
17934 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17935 fix these up with a pair of ldr. */
17936 if (TARGET_THUMB2
17937 || !CONST_INT_P (otherops[2])
17938 || (INTVAL (otherops[2]) > -256
17939 && INTVAL (otherops[2]) < 256))
17941 if (emit)
17942 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
17944 else
17946 if (emit)
17948 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17949 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
17951 if (count)
17952 *count = 2;
17955 break;
17957 case LABEL_REF:
17958 case CONST:
17959 /* We might be able to use ldrd %0, %1 here. However the range is
17960 different to ldr/adr, and it is broken on some ARMv7-M
17961 implementations. */
17962 /* Use the second register of the pair to avoid problematic
17963 overlap. */
17964 otherops[1] = operands[1];
17965 if (emit)
17966 output_asm_insn ("adr%?\t%0, %1", otherops);
17967 operands[1] = otherops[0];
17968 if (emit)
17970 if (TARGET_LDRD)
17971 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
17972 else
17973 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
17976 if (count)
17977 *count = 2;
17978 break;
17980 /* ??? This needs checking for thumb2. */
17981 default:
17982 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
17983 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
17985 otherops[0] = operands[0];
17986 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
17987 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
17989 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
17991 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
17993 switch ((int) INTVAL (otherops[2]))
17995 case -8:
17996 if (emit)
17997 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
17998 return "";
17999 case -4:
18000 if (TARGET_THUMB2)
18001 break;
18002 if (emit)
18003 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
18004 return "";
18005 case 4:
18006 if (TARGET_THUMB2)
18007 break;
18008 if (emit)
18009 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
18010 return "";
18013 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18014 operands[1] = otherops[0];
18015 if (TARGET_LDRD
18016 && (REG_P (otherops[2])
18017 || TARGET_THUMB2
18018 || (CONST_INT_P (otherops[2])
18019 && INTVAL (otherops[2]) > -256
18020 && INTVAL (otherops[2]) < 256)))
18022 if (reg_overlap_mentioned_p (operands[0],
18023 otherops[2]))
18025 /* Swap base and index registers over to
18026 avoid a conflict. */
18027 std::swap (otherops[1], otherops[2]);
18029 /* If both registers conflict, it will usually
18030 have been fixed by a splitter. */
18031 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18032 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18034 if (emit)
18036 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18037 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18039 if (count)
18040 *count = 2;
18042 else
18044 otherops[0] = operands[0];
18045 if (emit)
18046 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
18048 return "";
18051 if (CONST_INT_P (otherops[2]))
18053 if (emit)
18055 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18056 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18057 else
18058 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18061 else
18063 if (emit)
18064 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18067 else
18069 if (emit)
18070 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18073 if (count)
18074 *count = 2;
18076 if (TARGET_LDRD)
18077 return "ldr%(d%)\t%0, [%1]";
18079 return "ldm%(ia%)\t%1, %M0";
18081 else
18083 otherops[1] = adjust_address (operands[1], SImode, 4);
18084 /* Take care of overlapping base/data reg. */
18085 if (reg_mentioned_p (operands[0], operands[1]))
18087 if (emit)
18089 output_asm_insn ("ldr%?\t%0, %1", otherops);
18090 output_asm_insn ("ldr%?\t%0, %1", operands);
18092 if (count)
18093 *count = 2;
18096 else
18098 if (emit)
18100 output_asm_insn ("ldr%?\t%0, %1", operands);
18101 output_asm_insn ("ldr%?\t%0, %1", otherops);
18103 if (count)
18104 *count = 2;
18109 else
18111 /* Constraints should ensure this. */
18112 gcc_assert (code0 == MEM && code1 == REG);
18113 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18114 || (TARGET_ARM && TARGET_LDRD));
18116 switch (GET_CODE (XEXP (operands[0], 0)))
18118 case REG:
18119 if (emit)
18121 if (TARGET_LDRD)
18122 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18123 else
18124 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18126 break;
18128 case PRE_INC:
18129 gcc_assert (TARGET_LDRD);
18130 if (emit)
18131 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18132 break;
18134 case PRE_DEC:
18135 if (emit)
18137 if (TARGET_LDRD)
18138 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18139 else
18140 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18142 break;
18144 case POST_INC:
18145 if (emit)
18147 if (TARGET_LDRD)
18148 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18149 else
18150 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18152 break;
18154 case POST_DEC:
18155 gcc_assert (TARGET_LDRD);
18156 if (emit)
18157 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18158 break;
18160 case PRE_MODIFY:
18161 case POST_MODIFY:
18162 otherops[0] = operands[1];
18163 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18164 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18166 /* IWMMXT allows offsets larger than ldrd can handle,
18167 fix these up with a pair of ldr. */
18168 if (!TARGET_THUMB2
18169 && CONST_INT_P (otherops[2])
18170 && (INTVAL(otherops[2]) <= -256
18171 || INTVAL(otherops[2]) >= 256))
18173 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18175 if (emit)
18177 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18178 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18180 if (count)
18181 *count = 2;
18183 else
18185 if (emit)
18187 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18188 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18190 if (count)
18191 *count = 2;
18194 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18196 if (emit)
18197 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18199 else
18201 if (emit)
18202 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18204 break;
18206 case PLUS:
18207 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18208 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18210 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18212 case -8:
18213 if (emit)
18214 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18215 return "";
18217 case -4:
18218 if (TARGET_THUMB2)
18219 break;
18220 if (emit)
18221 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18222 return "";
18224 case 4:
18225 if (TARGET_THUMB2)
18226 break;
18227 if (emit)
18228 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18229 return "";
18232 if (TARGET_LDRD
18233 && (REG_P (otherops[2])
18234 || TARGET_THUMB2
18235 || (CONST_INT_P (otherops[2])
18236 && INTVAL (otherops[2]) > -256
18237 && INTVAL (otherops[2]) < 256)))
18239 otherops[0] = operands[1];
18240 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18241 if (emit)
18242 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18243 return "";
18245 /* Fall through */
18247 default:
18248 otherops[0] = adjust_address (operands[0], SImode, 4);
18249 otherops[1] = operands[1];
18250 if (emit)
18252 output_asm_insn ("str%?\t%1, %0", operands);
18253 output_asm_insn ("str%?\t%H1, %0", otherops);
18255 if (count)
18256 *count = 2;
18260 return "";
18263 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18264 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18266 const char *
18267 output_move_quad (rtx *operands)
18269 if (REG_P (operands[0]))
18271 /* Load, or reg->reg move. */
18273 if (MEM_P (operands[1]))
18275 switch (GET_CODE (XEXP (operands[1], 0)))
18277 case REG:
18278 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18279 break;
18281 case LABEL_REF:
18282 case CONST:
18283 output_asm_insn ("adr%?\t%0, %1", operands);
18284 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18285 break;
18287 default:
18288 gcc_unreachable ();
18291 else
18293 rtx ops[2];
18294 int dest, src, i;
18296 gcc_assert (REG_P (operands[1]));
18298 dest = REGNO (operands[0]);
18299 src = REGNO (operands[1]);
18301 /* This seems pretty dumb, but hopefully GCC won't try to do it
18302 very often. */
18303 if (dest < src)
18304 for (i = 0; i < 4; i++)
18306 ops[0] = gen_rtx_REG (SImode, dest + i);
18307 ops[1] = gen_rtx_REG (SImode, src + i);
18308 output_asm_insn ("mov%?\t%0, %1", ops);
18310 else
18311 for (i = 3; i >= 0; i--)
18313 ops[0] = gen_rtx_REG (SImode, dest + i);
18314 ops[1] = gen_rtx_REG (SImode, src + i);
18315 output_asm_insn ("mov%?\t%0, %1", ops);
18319 else
18321 gcc_assert (MEM_P (operands[0]));
18322 gcc_assert (REG_P (operands[1]));
18323 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18325 switch (GET_CODE (XEXP (operands[0], 0)))
18327 case REG:
18328 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18329 break;
18331 default:
18332 gcc_unreachable ();
18336 return "";
18339 /* Output a VFP load or store instruction. */
18341 const char *
18342 output_move_vfp (rtx *operands)
18344 rtx reg, mem, addr, ops[2];
18345 int load = REG_P (operands[0]);
18346 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18347 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18348 const char *templ;
18349 char buff[50];
18350 machine_mode mode;
18352 reg = operands[!load];
18353 mem = operands[load];
18355 mode = GET_MODE (reg);
18357 gcc_assert (REG_P (reg));
18358 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18359 gcc_assert (mode == SFmode
18360 || mode == DFmode
18361 || mode == SImode
18362 || mode == DImode
18363 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18364 gcc_assert (MEM_P (mem));
18366 addr = XEXP (mem, 0);
18368 switch (GET_CODE (addr))
18370 case PRE_DEC:
18371 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18372 ops[0] = XEXP (addr, 0);
18373 ops[1] = reg;
18374 break;
18376 case POST_INC:
18377 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18378 ops[0] = XEXP (addr, 0);
18379 ops[1] = reg;
18380 break;
18382 default:
18383 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18384 ops[0] = reg;
18385 ops[1] = mem;
18386 break;
18389 sprintf (buff, templ,
18390 load ? "ld" : "st",
18391 dp ? "64" : "32",
18392 dp ? "P" : "",
18393 integer_p ? "\t%@ int" : "");
18394 output_asm_insn (buff, ops);
18396 return "";
18399 /* Output a Neon double-word or quad-word load or store, or a load
18400 or store for larger structure modes.
18402 WARNING: The ordering of elements is weird in big-endian mode,
18403 because the EABI requires that vectors stored in memory appear
18404 as though they were stored by a VSTM, as required by the EABI.
18405 GCC RTL defines element ordering based on in-memory order.
18406 This can be different from the architectural ordering of elements
18407 within a NEON register. The intrinsics defined in arm_neon.h use the
18408 NEON register element ordering, not the GCC RTL element ordering.
18410 For example, the in-memory ordering of a big-endian a quadword
18411 vector with 16-bit elements when stored from register pair {d0,d1}
18412 will be (lowest address first, d0[N] is NEON register element N):
18414 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18416 When necessary, quadword registers (dN, dN+1) are moved to ARM
18417 registers from rN in the order:
18419 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18421 So that STM/LDM can be used on vectors in ARM registers, and the
18422 same memory layout will result as if VSTM/VLDM were used.
18424 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18425 possible, which allows use of appropriate alignment tags.
18426 Note that the choice of "64" is independent of the actual vector
18427 element size; this size simply ensures that the behavior is
18428 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18430 Due to limitations of those instructions, use of VST1.64/VLD1.64
18431 is not possible if:
18432 - the address contains PRE_DEC, or
18433 - the mode refers to more than 4 double-word registers
18435 In those cases, it would be possible to replace VSTM/VLDM by a
18436 sequence of instructions; this is not currently implemented since
18437 this is not certain to actually improve performance. */
18439 const char *
18440 output_move_neon (rtx *operands)
18442 rtx reg, mem, addr, ops[2];
18443 int regno, nregs, load = REG_P (operands[0]);
18444 const char *templ;
18445 char buff[50];
18446 machine_mode mode;
18448 reg = operands[!load];
18449 mem = operands[load];
18451 mode = GET_MODE (reg);
18453 gcc_assert (REG_P (reg));
18454 regno = REGNO (reg);
18455 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18456 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18457 || NEON_REGNO_OK_FOR_QUAD (regno));
18458 gcc_assert (VALID_NEON_DREG_MODE (mode)
18459 || VALID_NEON_QREG_MODE (mode)
18460 || VALID_NEON_STRUCT_MODE (mode));
18461 gcc_assert (MEM_P (mem));
18463 addr = XEXP (mem, 0);
18465 /* Strip off const from addresses like (const (plus (...))). */
18466 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18467 addr = XEXP (addr, 0);
18469 switch (GET_CODE (addr))
18471 case POST_INC:
18472 /* We have to use vldm / vstm for too-large modes. */
18473 if (nregs > 4)
18475 templ = "v%smia%%?\t%%0!, %%h1";
18476 ops[0] = XEXP (addr, 0);
18478 else
18480 templ = "v%s1.64\t%%h1, %%A0";
18481 ops[0] = mem;
18483 ops[1] = reg;
18484 break;
18486 case PRE_DEC:
18487 /* We have to use vldm / vstm in this case, since there is no
18488 pre-decrement form of the vld1 / vst1 instructions. */
18489 templ = "v%smdb%%?\t%%0!, %%h1";
18490 ops[0] = XEXP (addr, 0);
18491 ops[1] = reg;
18492 break;
18494 case POST_MODIFY:
18495 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18496 gcc_unreachable ();
18498 case REG:
18499 /* We have to use vldm / vstm for too-large modes. */
18500 if (nregs > 1)
18502 if (nregs > 4)
18503 templ = "v%smia%%?\t%%m0, %%h1";
18504 else
18505 templ = "v%s1.64\t%%h1, %%A0";
18507 ops[0] = mem;
18508 ops[1] = reg;
18509 break;
18511 /* Fall through. */
18512 case LABEL_REF:
18513 case PLUS:
18515 int i;
18516 int overlap = -1;
18517 for (i = 0; i < nregs; i++)
18519 /* We're only using DImode here because it's a convenient size. */
18520 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18521 ops[1] = adjust_address (mem, DImode, 8 * i);
18522 if (reg_overlap_mentioned_p (ops[0], mem))
18524 gcc_assert (overlap == -1);
18525 overlap = i;
18527 else
18529 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18530 output_asm_insn (buff, ops);
18533 if (overlap != -1)
18535 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18536 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18537 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18538 output_asm_insn (buff, ops);
18541 return "";
18544 default:
18545 gcc_unreachable ();
18548 sprintf (buff, templ, load ? "ld" : "st");
18549 output_asm_insn (buff, ops);
18551 return "";
18554 /* Compute and return the length of neon_mov<mode>, where <mode> is
18555 one of VSTRUCT modes: EI, OI, CI or XI. */
18557 arm_attr_length_move_neon (rtx_insn *insn)
18559 rtx reg, mem, addr;
18560 int load;
18561 machine_mode mode;
18563 extract_insn_cached (insn);
18565 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18567 mode = GET_MODE (recog_data.operand[0]);
18568 switch (mode)
18570 case EImode:
18571 case OImode:
18572 return 8;
18573 case CImode:
18574 return 12;
18575 case XImode:
18576 return 16;
18577 default:
18578 gcc_unreachable ();
18582 load = REG_P (recog_data.operand[0]);
18583 reg = recog_data.operand[!load];
18584 mem = recog_data.operand[load];
18586 gcc_assert (MEM_P (mem));
18588 mode = GET_MODE (reg);
18589 addr = XEXP (mem, 0);
18591 /* Strip off const from addresses like (const (plus (...))). */
18592 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18593 addr = XEXP (addr, 0);
18595 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18597 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18598 return insns * 4;
18600 else
18601 return 4;
18604 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18605 return zero. */
18608 arm_address_offset_is_imm (rtx_insn *insn)
18610 rtx mem, addr;
18612 extract_insn_cached (insn);
18614 if (REG_P (recog_data.operand[0]))
18615 return 0;
18617 mem = recog_data.operand[0];
18619 gcc_assert (MEM_P (mem));
18621 addr = XEXP (mem, 0);
18623 if (REG_P (addr)
18624 || (GET_CODE (addr) == PLUS
18625 && REG_P (XEXP (addr, 0))
18626 && CONST_INT_P (XEXP (addr, 1))))
18627 return 1;
18628 else
18629 return 0;
18632 /* Output an ADD r, s, #n where n may be too big for one instruction.
18633 If adding zero to one register, output nothing. */
18634 const char *
18635 output_add_immediate (rtx *operands)
18637 HOST_WIDE_INT n = INTVAL (operands[2]);
18639 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18641 if (n < 0)
18642 output_multi_immediate (operands,
18643 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18644 -n);
18645 else
18646 output_multi_immediate (operands,
18647 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18651 return "";
18654 /* Output a multiple immediate operation.
18655 OPERANDS is the vector of operands referred to in the output patterns.
18656 INSTR1 is the output pattern to use for the first constant.
18657 INSTR2 is the output pattern to use for subsequent constants.
18658 IMMED_OP is the index of the constant slot in OPERANDS.
18659 N is the constant value. */
18660 static const char *
18661 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18662 int immed_op, HOST_WIDE_INT n)
18664 #if HOST_BITS_PER_WIDE_INT > 32
18665 n &= 0xffffffff;
18666 #endif
18668 if (n == 0)
18670 /* Quick and easy output. */
18671 operands[immed_op] = const0_rtx;
18672 output_asm_insn (instr1, operands);
18674 else
18676 int i;
18677 const char * instr = instr1;
18679 /* Note that n is never zero here (which would give no output). */
18680 for (i = 0; i < 32; i += 2)
18682 if (n & (3 << i))
18684 operands[immed_op] = GEN_INT (n & (255 << i));
18685 output_asm_insn (instr, operands);
18686 instr = instr2;
18687 i += 6;
18692 return "";
18695 /* Return the name of a shifter operation. */
18696 static const char *
18697 arm_shift_nmem(enum rtx_code code)
18699 switch (code)
18701 case ASHIFT:
18702 return ARM_LSL_NAME;
18704 case ASHIFTRT:
18705 return "asr";
18707 case LSHIFTRT:
18708 return "lsr";
18710 case ROTATERT:
18711 return "ror";
18713 default:
18714 abort();
18718 /* Return the appropriate ARM instruction for the operation code.
18719 The returned result should not be overwritten. OP is the rtx of the
18720 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18721 was shifted. */
18722 const char *
18723 arithmetic_instr (rtx op, int shift_first_arg)
18725 switch (GET_CODE (op))
18727 case PLUS:
18728 return "add";
18730 case MINUS:
18731 return shift_first_arg ? "rsb" : "sub";
18733 case IOR:
18734 return "orr";
18736 case XOR:
18737 return "eor";
18739 case AND:
18740 return "and";
18742 case ASHIFT:
18743 case ASHIFTRT:
18744 case LSHIFTRT:
18745 case ROTATERT:
18746 return arm_shift_nmem(GET_CODE(op));
18748 default:
18749 gcc_unreachable ();
18753 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18754 for the operation code. The returned result should not be overwritten.
18755 OP is the rtx code of the shift.
18756 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18757 shift. */
18758 static const char *
18759 shift_op (rtx op, HOST_WIDE_INT *amountp)
18761 const char * mnem;
18762 enum rtx_code code = GET_CODE (op);
18764 switch (code)
18766 case ROTATE:
18767 if (!CONST_INT_P (XEXP (op, 1)))
18769 output_operand_lossage ("invalid shift operand");
18770 return NULL;
18773 code = ROTATERT;
18774 *amountp = 32 - INTVAL (XEXP (op, 1));
18775 mnem = "ror";
18776 break;
18778 case ASHIFT:
18779 case ASHIFTRT:
18780 case LSHIFTRT:
18781 case ROTATERT:
18782 mnem = arm_shift_nmem(code);
18783 if (CONST_INT_P (XEXP (op, 1)))
18785 *amountp = INTVAL (XEXP (op, 1));
18787 else if (REG_P (XEXP (op, 1)))
18789 *amountp = -1;
18790 return mnem;
18792 else
18794 output_operand_lossage ("invalid shift operand");
18795 return NULL;
18797 break;
18799 case MULT:
18800 /* We never have to worry about the amount being other than a
18801 power of 2, since this case can never be reloaded from a reg. */
18802 if (!CONST_INT_P (XEXP (op, 1)))
18804 output_operand_lossage ("invalid shift operand");
18805 return NULL;
18808 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18810 /* Amount must be a power of two. */
18811 if (*amountp & (*amountp - 1))
18813 output_operand_lossage ("invalid shift operand");
18814 return NULL;
18817 *amountp = int_log2 (*amountp);
18818 return ARM_LSL_NAME;
18820 default:
18821 output_operand_lossage ("invalid shift operand");
18822 return NULL;
18825 /* This is not 100% correct, but follows from the desire to merge
18826 multiplication by a power of 2 with the recognizer for a
18827 shift. >=32 is not a valid shift for "lsl", so we must try and
18828 output a shift that produces the correct arithmetical result.
18829 Using lsr #32 is identical except for the fact that the carry bit
18830 is not set correctly if we set the flags; but we never use the
18831 carry bit from such an operation, so we can ignore that. */
18832 if (code == ROTATERT)
18833 /* Rotate is just modulo 32. */
18834 *amountp &= 31;
18835 else if (*amountp != (*amountp & 31))
18837 if (code == ASHIFT)
18838 mnem = "lsr";
18839 *amountp = 32;
18842 /* Shifts of 0 are no-ops. */
18843 if (*amountp == 0)
18844 return NULL;
18846 return mnem;
18849 /* Obtain the shift from the POWER of two. */
18851 static HOST_WIDE_INT
18852 int_log2 (HOST_WIDE_INT power)
18854 HOST_WIDE_INT shift = 0;
18856 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18858 gcc_assert (shift <= 31);
18859 shift++;
18862 return shift;
18865 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18866 because /bin/as is horribly restrictive. The judgement about
18867 whether or not each character is 'printable' (and can be output as
18868 is) or not (and must be printed with an octal escape) must be made
18869 with reference to the *host* character set -- the situation is
18870 similar to that discussed in the comments above pp_c_char in
18871 c-pretty-print.c. */
18873 #define MAX_ASCII_LEN 51
18875 void
18876 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18878 int i;
18879 int len_so_far = 0;
18881 fputs ("\t.ascii\t\"", stream);
18883 for (i = 0; i < len; i++)
18885 int c = p[i];
18887 if (len_so_far >= MAX_ASCII_LEN)
18889 fputs ("\"\n\t.ascii\t\"", stream);
18890 len_so_far = 0;
18893 if (ISPRINT (c))
18895 if (c == '\\' || c == '\"')
18897 putc ('\\', stream);
18898 len_so_far++;
18900 putc (c, stream);
18901 len_so_far++;
18903 else
18905 fprintf (stream, "\\%03o", c);
18906 len_so_far += 4;
18910 fputs ("\"\n", stream);
18913 /* Whether a register is callee saved or not. This is necessary because high
18914 registers are marked as caller saved when optimizing for size on Thumb-1
18915 targets despite being callee saved in order to avoid using them. */
18916 #define callee_saved_reg_p(reg) \
18917 (!call_used_regs[reg] \
18918 || (TARGET_THUMB1 && optimize_size \
18919 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
18921 /* Compute the register save mask for registers 0 through 12
18922 inclusive. This code is used by arm_compute_save_reg_mask. */
18924 static unsigned long
18925 arm_compute_save_reg0_reg12_mask (void)
18927 unsigned long func_type = arm_current_func_type ();
18928 unsigned long save_reg_mask = 0;
18929 unsigned int reg;
18931 if (IS_INTERRUPT (func_type))
18933 unsigned int max_reg;
18934 /* Interrupt functions must not corrupt any registers,
18935 even call clobbered ones. If this is a leaf function
18936 we can just examine the registers used by the RTL, but
18937 otherwise we have to assume that whatever function is
18938 called might clobber anything, and so we have to save
18939 all the call-clobbered registers as well. */
18940 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
18941 /* FIQ handlers have registers r8 - r12 banked, so
18942 we only need to check r0 - r7, Normal ISRs only
18943 bank r14 and r15, so we must check up to r12.
18944 r13 is the stack pointer which is always preserved,
18945 so we do not need to consider it here. */
18946 max_reg = 7;
18947 else
18948 max_reg = 12;
18950 for (reg = 0; reg <= max_reg; reg++)
18951 if (df_regs_ever_live_p (reg)
18952 || (! crtl->is_leaf && call_used_regs[reg]))
18953 save_reg_mask |= (1 << reg);
18955 /* Also save the pic base register if necessary. */
18956 if (flag_pic
18957 && !TARGET_SINGLE_PIC_BASE
18958 && arm_pic_register != INVALID_REGNUM
18959 && crtl->uses_pic_offset_table)
18960 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18962 else if (IS_VOLATILE(func_type))
18964 /* For noreturn functions we historically omitted register saves
18965 altogether. However this really messes up debugging. As a
18966 compromise save just the frame pointers. Combined with the link
18967 register saved elsewhere this should be sufficient to get
18968 a backtrace. */
18969 if (frame_pointer_needed)
18970 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18971 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
18972 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18973 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
18974 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
18976 else
18978 /* In the normal case we only need to save those registers
18979 which are call saved and which are used by this function. */
18980 for (reg = 0; reg <= 11; reg++)
18981 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
18982 save_reg_mask |= (1 << reg);
18984 /* Handle the frame pointer as a special case. */
18985 if (frame_pointer_needed)
18986 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18988 /* If we aren't loading the PIC register,
18989 don't stack it even though it may be live. */
18990 if (flag_pic
18991 && !TARGET_SINGLE_PIC_BASE
18992 && arm_pic_register != INVALID_REGNUM
18993 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
18994 || crtl->uses_pic_offset_table))
18995 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18997 /* The prologue will copy SP into R0, so save it. */
18998 if (IS_STACKALIGN (func_type))
18999 save_reg_mask |= 1;
19002 /* Save registers so the exception handler can modify them. */
19003 if (crtl->calls_eh_return)
19005 unsigned int i;
19007 for (i = 0; ; i++)
19009 reg = EH_RETURN_DATA_REGNO (i);
19010 if (reg == INVALID_REGNUM)
19011 break;
19012 save_reg_mask |= 1 << reg;
19016 return save_reg_mask;
19019 /* Return true if r3 is live at the start of the function. */
19021 static bool
19022 arm_r3_live_at_start_p (void)
19024 /* Just look at cfg info, which is still close enough to correct at this
19025 point. This gives false positives for broken functions that might use
19026 uninitialized data that happens to be allocated in r3, but who cares? */
19027 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19030 /* Compute the number of bytes used to store the static chain register on the
19031 stack, above the stack frame. We need to know this accurately to get the
19032 alignment of the rest of the stack frame correct. */
19034 static int
19035 arm_compute_static_chain_stack_bytes (void)
19037 /* See the defining assertion in arm_expand_prologue. */
19038 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
19039 && IS_NESTED (arm_current_func_type ())
19040 && arm_r3_live_at_start_p ()
19041 && crtl->args.pretend_args_size == 0)
19042 return 4;
19044 return 0;
19047 /* Compute a bit mask of which registers need to be
19048 saved on the stack for the current function.
19049 This is used by arm_get_frame_offsets, which may add extra registers. */
19051 static unsigned long
19052 arm_compute_save_reg_mask (void)
19054 unsigned int save_reg_mask = 0;
19055 unsigned long func_type = arm_current_func_type ();
19056 unsigned int reg;
19058 if (IS_NAKED (func_type))
19059 /* This should never really happen. */
19060 return 0;
19062 /* If we are creating a stack frame, then we must save the frame pointer,
19063 IP (which will hold the old stack pointer), LR and the PC. */
19064 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19065 save_reg_mask |=
19066 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19067 | (1 << IP_REGNUM)
19068 | (1 << LR_REGNUM)
19069 | (1 << PC_REGNUM);
19071 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19073 /* Decide if we need to save the link register.
19074 Interrupt routines have their own banked link register,
19075 so they never need to save it.
19076 Otherwise if we do not use the link register we do not need to save
19077 it. If we are pushing other registers onto the stack however, we
19078 can save an instruction in the epilogue by pushing the link register
19079 now and then popping it back into the PC. This incurs extra memory
19080 accesses though, so we only do it when optimizing for size, and only
19081 if we know that we will not need a fancy return sequence. */
19082 if (df_regs_ever_live_p (LR_REGNUM)
19083 || (save_reg_mask
19084 && optimize_size
19085 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19086 && !crtl->tail_call_emit
19087 && !crtl->calls_eh_return))
19088 save_reg_mask |= 1 << LR_REGNUM;
19090 if (cfun->machine->lr_save_eliminated)
19091 save_reg_mask &= ~ (1 << LR_REGNUM);
19093 if (TARGET_REALLY_IWMMXT
19094 && ((bit_count (save_reg_mask)
19095 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19096 arm_compute_static_chain_stack_bytes())
19097 ) % 2) != 0)
19099 /* The total number of registers that are going to be pushed
19100 onto the stack is odd. We need to ensure that the stack
19101 is 64-bit aligned before we start to save iWMMXt registers,
19102 and also before we start to create locals. (A local variable
19103 might be a double or long long which we will load/store using
19104 an iWMMXt instruction). Therefore we need to push another
19105 ARM register, so that the stack will be 64-bit aligned. We
19106 try to avoid using the arg registers (r0 -r3) as they might be
19107 used to pass values in a tail call. */
19108 for (reg = 4; reg <= 12; reg++)
19109 if ((save_reg_mask & (1 << reg)) == 0)
19110 break;
19112 if (reg <= 12)
19113 save_reg_mask |= (1 << reg);
19114 else
19116 cfun->machine->sibcall_blocked = 1;
19117 save_reg_mask |= (1 << 3);
19121 /* We may need to push an additional register for use initializing the
19122 PIC base register. */
19123 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19124 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19126 reg = thumb_find_work_register (1 << 4);
19127 if (!call_used_regs[reg])
19128 save_reg_mask |= (1 << reg);
19131 return save_reg_mask;
19135 /* Compute a bit mask of which registers need to be
19136 saved on the stack for the current function. */
19137 static unsigned long
19138 thumb1_compute_save_reg_mask (void)
19140 unsigned long mask;
19141 unsigned reg;
19143 mask = 0;
19144 for (reg = 0; reg < 12; reg ++)
19145 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19146 mask |= 1 << reg;
19148 if (flag_pic
19149 && !TARGET_SINGLE_PIC_BASE
19150 && arm_pic_register != INVALID_REGNUM
19151 && crtl->uses_pic_offset_table)
19152 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19154 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19155 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19156 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19158 /* LR will also be pushed if any lo regs are pushed. */
19159 if (mask & 0xff || thumb_force_lr_save ())
19160 mask |= (1 << LR_REGNUM);
19162 /* Make sure we have a low work register if we need one.
19163 We will need one if we are going to push a high register,
19164 but we are not currently intending to push a low register. */
19165 if ((mask & 0xff) == 0
19166 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19168 /* Use thumb_find_work_register to choose which register
19169 we will use. If the register is live then we will
19170 have to push it. Use LAST_LO_REGNUM as our fallback
19171 choice for the register to select. */
19172 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19173 /* Make sure the register returned by thumb_find_work_register is
19174 not part of the return value. */
19175 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19176 reg = LAST_LO_REGNUM;
19178 if (callee_saved_reg_p (reg))
19179 mask |= 1 << reg;
19182 /* The 504 below is 8 bytes less than 512 because there are two possible
19183 alignment words. We can't tell here if they will be present or not so we
19184 have to play it safe and assume that they are. */
19185 if ((CALLER_INTERWORKING_SLOT_SIZE +
19186 ROUND_UP_WORD (get_frame_size ()) +
19187 crtl->outgoing_args_size) >= 504)
19189 /* This is the same as the code in thumb1_expand_prologue() which
19190 determines which register to use for stack decrement. */
19191 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19192 if (mask & (1 << reg))
19193 break;
19195 if (reg > LAST_LO_REGNUM)
19197 /* Make sure we have a register available for stack decrement. */
19198 mask |= 1 << LAST_LO_REGNUM;
19202 return mask;
19206 /* Return the number of bytes required to save VFP registers. */
19207 static int
19208 arm_get_vfp_saved_size (void)
19210 unsigned int regno;
19211 int count;
19212 int saved;
19214 saved = 0;
19215 /* Space for saved VFP registers. */
19216 if (TARGET_HARD_FLOAT && TARGET_VFP)
19218 count = 0;
19219 for (regno = FIRST_VFP_REGNUM;
19220 regno < LAST_VFP_REGNUM;
19221 regno += 2)
19223 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19224 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19226 if (count > 0)
19228 /* Workaround ARM10 VFPr1 bug. */
19229 if (count == 2 && !arm_arch6)
19230 count++;
19231 saved += count * 8;
19233 count = 0;
19235 else
19236 count++;
19238 if (count > 0)
19240 if (count == 2 && !arm_arch6)
19241 count++;
19242 saved += count * 8;
19245 return saved;
19249 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19250 everything bar the final return instruction. If simple_return is true,
19251 then do not output epilogue, because it has already been emitted in RTL. */
19252 const char *
19253 output_return_instruction (rtx operand, bool really_return, bool reverse,
19254 bool simple_return)
19256 char conditional[10];
19257 char instr[100];
19258 unsigned reg;
19259 unsigned long live_regs_mask;
19260 unsigned long func_type;
19261 arm_stack_offsets *offsets;
19263 func_type = arm_current_func_type ();
19265 if (IS_NAKED (func_type))
19266 return "";
19268 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19270 /* If this function was declared non-returning, and we have
19271 found a tail call, then we have to trust that the called
19272 function won't return. */
19273 if (really_return)
19275 rtx ops[2];
19277 /* Otherwise, trap an attempted return by aborting. */
19278 ops[0] = operand;
19279 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19280 : "abort");
19281 assemble_external_libcall (ops[1]);
19282 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19285 return "";
19288 gcc_assert (!cfun->calls_alloca || really_return);
19290 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19292 cfun->machine->return_used_this_function = 1;
19294 offsets = arm_get_frame_offsets ();
19295 live_regs_mask = offsets->saved_regs_mask;
19297 if (!simple_return && live_regs_mask)
19299 const char * return_reg;
19301 /* If we do not have any special requirements for function exit
19302 (e.g. interworking) then we can load the return address
19303 directly into the PC. Otherwise we must load it into LR. */
19304 if (really_return
19305 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19306 return_reg = reg_names[PC_REGNUM];
19307 else
19308 return_reg = reg_names[LR_REGNUM];
19310 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19312 /* There are three possible reasons for the IP register
19313 being saved. 1) a stack frame was created, in which case
19314 IP contains the old stack pointer, or 2) an ISR routine
19315 corrupted it, or 3) it was saved to align the stack on
19316 iWMMXt. In case 1, restore IP into SP, otherwise just
19317 restore IP. */
19318 if (frame_pointer_needed)
19320 live_regs_mask &= ~ (1 << IP_REGNUM);
19321 live_regs_mask |= (1 << SP_REGNUM);
19323 else
19324 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19327 /* On some ARM architectures it is faster to use LDR rather than
19328 LDM to load a single register. On other architectures, the
19329 cost is the same. In 26 bit mode, or for exception handlers,
19330 we have to use LDM to load the PC so that the CPSR is also
19331 restored. */
19332 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19333 if (live_regs_mask == (1U << reg))
19334 break;
19336 if (reg <= LAST_ARM_REGNUM
19337 && (reg != LR_REGNUM
19338 || ! really_return
19339 || ! IS_INTERRUPT (func_type)))
19341 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19342 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19344 else
19346 char *p;
19347 int first = 1;
19349 /* Generate the load multiple instruction to restore the
19350 registers. Note we can get here, even if
19351 frame_pointer_needed is true, but only if sp already
19352 points to the base of the saved core registers. */
19353 if (live_regs_mask & (1 << SP_REGNUM))
19355 unsigned HOST_WIDE_INT stack_adjust;
19357 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19358 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19360 if (stack_adjust && arm_arch5 && TARGET_ARM)
19361 if (TARGET_UNIFIED_ASM)
19362 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19363 else
19364 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19365 else
19367 /* If we can't use ldmib (SA110 bug),
19368 then try to pop r3 instead. */
19369 if (stack_adjust)
19370 live_regs_mask |= 1 << 3;
19372 if (TARGET_UNIFIED_ASM)
19373 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19374 else
19375 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19378 else
19379 if (TARGET_UNIFIED_ASM)
19380 sprintf (instr, "pop%s\t{", conditional);
19381 else
19382 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19384 p = instr + strlen (instr);
19386 for (reg = 0; reg <= SP_REGNUM; reg++)
19387 if (live_regs_mask & (1 << reg))
19389 int l = strlen (reg_names[reg]);
19391 if (first)
19392 first = 0;
19393 else
19395 memcpy (p, ", ", 2);
19396 p += 2;
19399 memcpy (p, "%|", 2);
19400 memcpy (p + 2, reg_names[reg], l);
19401 p += l + 2;
19404 if (live_regs_mask & (1 << LR_REGNUM))
19406 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19407 /* If returning from an interrupt, restore the CPSR. */
19408 if (IS_INTERRUPT (func_type))
19409 strcat (p, "^");
19411 else
19412 strcpy (p, "}");
19415 output_asm_insn (instr, & operand);
19417 /* See if we need to generate an extra instruction to
19418 perform the actual function return. */
19419 if (really_return
19420 && func_type != ARM_FT_INTERWORKED
19421 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19423 /* The return has already been handled
19424 by loading the LR into the PC. */
19425 return "";
19429 if (really_return)
19431 switch ((int) ARM_FUNC_TYPE (func_type))
19433 case ARM_FT_ISR:
19434 case ARM_FT_FIQ:
19435 /* ??? This is wrong for unified assembly syntax. */
19436 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19437 break;
19439 case ARM_FT_INTERWORKED:
19440 sprintf (instr, "bx%s\t%%|lr", conditional);
19441 break;
19443 case ARM_FT_EXCEPTION:
19444 /* ??? This is wrong for unified assembly syntax. */
19445 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19446 break;
19448 default:
19449 /* Use bx if it's available. */
19450 if (arm_arch5 || arm_arch4t)
19451 sprintf (instr, "bx%s\t%%|lr", conditional);
19452 else
19453 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19454 break;
19457 output_asm_insn (instr, & operand);
19460 return "";
19463 /* Write the function name into the code section, directly preceding
19464 the function prologue.
19466 Code will be output similar to this:
19468 .ascii "arm_poke_function_name", 0
19469 .align
19471 .word 0xff000000 + (t1 - t0)
19472 arm_poke_function_name
19473 mov ip, sp
19474 stmfd sp!, {fp, ip, lr, pc}
19475 sub fp, ip, #4
19477 When performing a stack backtrace, code can inspect the value
19478 of 'pc' stored at 'fp' + 0. If the trace function then looks
19479 at location pc - 12 and the top 8 bits are set, then we know
19480 that there is a function name embedded immediately preceding this
19481 location and has length ((pc[-3]) & 0xff000000).
19483 We assume that pc is declared as a pointer to an unsigned long.
19485 It is of no benefit to output the function name if we are assembling
19486 a leaf function. These function types will not contain a stack
19487 backtrace structure, therefore it is not possible to determine the
19488 function name. */
19489 void
19490 arm_poke_function_name (FILE *stream, const char *name)
19492 unsigned long alignlength;
19493 unsigned long length;
19494 rtx x;
19496 length = strlen (name) + 1;
19497 alignlength = ROUND_UP_WORD (length);
19499 ASM_OUTPUT_ASCII (stream, name, length);
19500 ASM_OUTPUT_ALIGN (stream, 2);
19501 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19502 assemble_aligned_integer (UNITS_PER_WORD, x);
19505 /* Place some comments into the assembler stream
19506 describing the current function. */
19507 static void
19508 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19510 unsigned long func_type;
19512 /* ??? Do we want to print some of the below anyway? */
19513 if (TARGET_THUMB1)
19514 return;
19516 /* Sanity check. */
19517 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19519 func_type = arm_current_func_type ();
19521 switch ((int) ARM_FUNC_TYPE (func_type))
19523 default:
19524 case ARM_FT_NORMAL:
19525 break;
19526 case ARM_FT_INTERWORKED:
19527 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19528 break;
19529 case ARM_FT_ISR:
19530 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19531 break;
19532 case ARM_FT_FIQ:
19533 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19534 break;
19535 case ARM_FT_EXCEPTION:
19536 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19537 break;
19540 if (IS_NAKED (func_type))
19541 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19543 if (IS_VOLATILE (func_type))
19544 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19546 if (IS_NESTED (func_type))
19547 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19548 if (IS_STACKALIGN (func_type))
19549 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19551 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19552 crtl->args.size,
19553 crtl->args.pretend_args_size, frame_size);
19555 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19556 frame_pointer_needed,
19557 cfun->machine->uses_anonymous_args);
19559 if (cfun->machine->lr_save_eliminated)
19560 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19562 if (crtl->calls_eh_return)
19563 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19567 static void
19568 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19569 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19571 arm_stack_offsets *offsets;
19573 if (TARGET_THUMB1)
19575 int regno;
19577 /* Emit any call-via-reg trampolines that are needed for v4t support
19578 of call_reg and call_value_reg type insns. */
19579 for (regno = 0; regno < LR_REGNUM; regno++)
19581 rtx label = cfun->machine->call_via[regno];
19583 if (label != NULL)
19585 switch_to_section (function_section (current_function_decl));
19586 targetm.asm_out.internal_label (asm_out_file, "L",
19587 CODE_LABEL_NUMBER (label));
19588 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19592 /* ??? Probably not safe to set this here, since it assumes that a
19593 function will be emitted as assembly immediately after we generate
19594 RTL for it. This does not happen for inline functions. */
19595 cfun->machine->return_used_this_function = 0;
19597 else /* TARGET_32BIT */
19599 /* We need to take into account any stack-frame rounding. */
19600 offsets = arm_get_frame_offsets ();
19602 gcc_assert (!use_return_insn (FALSE, NULL)
19603 || (cfun->machine->return_used_this_function != 0)
19604 || offsets->saved_regs == offsets->outgoing_args
19605 || frame_pointer_needed);
19609 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19610 STR and STRD. If an even number of registers are being pushed, one
19611 or more STRD patterns are created for each register pair. If an
19612 odd number of registers are pushed, emit an initial STR followed by
19613 as many STRD instructions as are needed. This works best when the
19614 stack is initially 64-bit aligned (the normal case), since it
19615 ensures that each STRD is also 64-bit aligned. */
19616 static void
19617 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19619 int num_regs = 0;
19620 int i;
19621 int regno;
19622 rtx par = NULL_RTX;
19623 rtx dwarf = NULL_RTX;
19624 rtx tmp;
19625 bool first = true;
19627 num_regs = bit_count (saved_regs_mask);
19629 /* Must be at least one register to save, and can't save SP or PC. */
19630 gcc_assert (num_regs > 0 && num_regs <= 14);
19631 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19632 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19634 /* Create sequence for DWARF info. All the frame-related data for
19635 debugging is held in this wrapper. */
19636 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19638 /* Describe the stack adjustment. */
19639 tmp = gen_rtx_SET (VOIDmode,
19640 stack_pointer_rtx,
19641 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19642 RTX_FRAME_RELATED_P (tmp) = 1;
19643 XVECEXP (dwarf, 0, 0) = tmp;
19645 /* Find the first register. */
19646 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19649 i = 0;
19651 /* If there's an odd number of registers to push. Start off by
19652 pushing a single register. This ensures that subsequent strd
19653 operations are dword aligned (assuming that SP was originally
19654 64-bit aligned). */
19655 if ((num_regs & 1) != 0)
19657 rtx reg, mem, insn;
19659 reg = gen_rtx_REG (SImode, regno);
19660 if (num_regs == 1)
19661 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19662 stack_pointer_rtx));
19663 else
19664 mem = gen_frame_mem (Pmode,
19665 gen_rtx_PRE_MODIFY
19666 (Pmode, stack_pointer_rtx,
19667 plus_constant (Pmode, stack_pointer_rtx,
19668 -4 * num_regs)));
19670 tmp = gen_rtx_SET (VOIDmode, mem, reg);
19671 RTX_FRAME_RELATED_P (tmp) = 1;
19672 insn = emit_insn (tmp);
19673 RTX_FRAME_RELATED_P (insn) = 1;
19674 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19675 tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19676 reg);
19677 RTX_FRAME_RELATED_P (tmp) = 1;
19678 i++;
19679 regno++;
19680 XVECEXP (dwarf, 0, i) = tmp;
19681 first = false;
19684 while (i < num_regs)
19685 if (saved_regs_mask & (1 << regno))
19687 rtx reg1, reg2, mem1, mem2;
19688 rtx tmp0, tmp1, tmp2;
19689 int regno2;
19691 /* Find the register to pair with this one. */
19692 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19693 regno2++)
19696 reg1 = gen_rtx_REG (SImode, regno);
19697 reg2 = gen_rtx_REG (SImode, regno2);
19699 if (first)
19701 rtx insn;
19703 first = false;
19704 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19705 stack_pointer_rtx,
19706 -4 * num_regs));
19707 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19708 stack_pointer_rtx,
19709 -4 * (num_regs - 1)));
19710 tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19711 plus_constant (Pmode, stack_pointer_rtx,
19712 -4 * (num_regs)));
19713 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19714 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19715 RTX_FRAME_RELATED_P (tmp0) = 1;
19716 RTX_FRAME_RELATED_P (tmp1) = 1;
19717 RTX_FRAME_RELATED_P (tmp2) = 1;
19718 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19719 XVECEXP (par, 0, 0) = tmp0;
19720 XVECEXP (par, 0, 1) = tmp1;
19721 XVECEXP (par, 0, 2) = tmp2;
19722 insn = emit_insn (par);
19723 RTX_FRAME_RELATED_P (insn) = 1;
19724 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19726 else
19728 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19729 stack_pointer_rtx,
19730 4 * i));
19731 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19732 stack_pointer_rtx,
19733 4 * (i + 1)));
19734 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19735 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19736 RTX_FRAME_RELATED_P (tmp1) = 1;
19737 RTX_FRAME_RELATED_P (tmp2) = 1;
19738 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19739 XVECEXP (par, 0, 0) = tmp1;
19740 XVECEXP (par, 0, 1) = tmp2;
19741 emit_insn (par);
19744 /* Create unwind information. This is an approximation. */
19745 tmp1 = gen_rtx_SET (VOIDmode,
19746 gen_frame_mem (Pmode,
19747 plus_constant (Pmode,
19748 stack_pointer_rtx,
19749 4 * i)),
19750 reg1);
19751 tmp2 = gen_rtx_SET (VOIDmode,
19752 gen_frame_mem (Pmode,
19753 plus_constant (Pmode,
19754 stack_pointer_rtx,
19755 4 * (i + 1))),
19756 reg2);
19758 RTX_FRAME_RELATED_P (tmp1) = 1;
19759 RTX_FRAME_RELATED_P (tmp2) = 1;
19760 XVECEXP (dwarf, 0, i + 1) = tmp1;
19761 XVECEXP (dwarf, 0, i + 2) = tmp2;
19762 i += 2;
19763 regno = regno2 + 1;
19765 else
19766 regno++;
19768 return;
19771 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19772 whenever possible, otherwise it emits single-word stores. The first store
19773 also allocates stack space for all saved registers, using writeback with
19774 post-addressing mode. All other stores use offset addressing. If no STRD
19775 can be emitted, this function emits a sequence of single-word stores,
19776 and not an STM as before, because single-word stores provide more freedom
19777 scheduling and can be turned into an STM by peephole optimizations. */
19778 static void
19779 arm_emit_strd_push (unsigned long saved_regs_mask)
19781 int num_regs = 0;
19782 int i, j, dwarf_index = 0;
19783 int offset = 0;
19784 rtx dwarf = NULL_RTX;
19785 rtx insn = NULL_RTX;
19786 rtx tmp, mem;
19788 /* TODO: A more efficient code can be emitted by changing the
19789 layout, e.g., first push all pairs that can use STRD to keep the
19790 stack aligned, and then push all other registers. */
19791 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19792 if (saved_regs_mask & (1 << i))
19793 num_regs++;
19795 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19796 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19797 gcc_assert (num_regs > 0);
19799 /* Create sequence for DWARF info. */
19800 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19802 /* For dwarf info, we generate explicit stack update. */
19803 tmp = gen_rtx_SET (VOIDmode,
19804 stack_pointer_rtx,
19805 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19806 RTX_FRAME_RELATED_P (tmp) = 1;
19807 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19809 /* Save registers. */
19810 offset = - 4 * num_regs;
19811 j = 0;
19812 while (j <= LAST_ARM_REGNUM)
19813 if (saved_regs_mask & (1 << j))
19815 if ((j % 2 == 0)
19816 && (saved_regs_mask & (1 << (j + 1))))
19818 /* Current register and previous register form register pair for
19819 which STRD can be generated. */
19820 if (offset < 0)
19822 /* Allocate stack space for all saved registers. */
19823 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19824 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19825 mem = gen_frame_mem (DImode, tmp);
19826 offset = 0;
19828 else if (offset > 0)
19829 mem = gen_frame_mem (DImode,
19830 plus_constant (Pmode,
19831 stack_pointer_rtx,
19832 offset));
19833 else
19834 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19836 tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
19837 RTX_FRAME_RELATED_P (tmp) = 1;
19838 tmp = emit_insn (tmp);
19840 /* Record the first store insn. */
19841 if (dwarf_index == 1)
19842 insn = tmp;
19844 /* Generate dwarf info. */
19845 mem = gen_frame_mem (SImode,
19846 plus_constant (Pmode,
19847 stack_pointer_rtx,
19848 offset));
19849 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19850 RTX_FRAME_RELATED_P (tmp) = 1;
19851 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19853 mem = gen_frame_mem (SImode,
19854 plus_constant (Pmode,
19855 stack_pointer_rtx,
19856 offset + 4));
19857 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
19858 RTX_FRAME_RELATED_P (tmp) = 1;
19859 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19861 offset += 8;
19862 j += 2;
19864 else
19866 /* Emit a single word store. */
19867 if (offset < 0)
19869 /* Allocate stack space for all saved registers. */
19870 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19871 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19872 mem = gen_frame_mem (SImode, tmp);
19873 offset = 0;
19875 else if (offset > 0)
19876 mem = gen_frame_mem (SImode,
19877 plus_constant (Pmode,
19878 stack_pointer_rtx,
19879 offset));
19880 else
19881 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19883 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19884 RTX_FRAME_RELATED_P (tmp) = 1;
19885 tmp = emit_insn (tmp);
19887 /* Record the first store insn. */
19888 if (dwarf_index == 1)
19889 insn = tmp;
19891 /* Generate dwarf info. */
19892 mem = gen_frame_mem (SImode,
19893 plus_constant(Pmode,
19894 stack_pointer_rtx,
19895 offset));
19896 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19897 RTX_FRAME_RELATED_P (tmp) = 1;
19898 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19900 offset += 4;
19901 j += 1;
19904 else
19905 j++;
19907 /* Attach dwarf info to the first insn we generate. */
19908 gcc_assert (insn != NULL_RTX);
19909 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19910 RTX_FRAME_RELATED_P (insn) = 1;
19913 /* Generate and emit an insn that we will recognize as a push_multi.
19914 Unfortunately, since this insn does not reflect very well the actual
19915 semantics of the operation, we need to annotate the insn for the benefit
19916 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19917 MASK for registers that should be annotated for DWARF2 frame unwind
19918 information. */
19919 static rtx
19920 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
19922 int num_regs = 0;
19923 int num_dwarf_regs = 0;
19924 int i, j;
19925 rtx par;
19926 rtx dwarf;
19927 int dwarf_par_index;
19928 rtx tmp, reg;
19930 /* We don't record the PC in the dwarf frame information. */
19931 dwarf_regs_mask &= ~(1 << PC_REGNUM);
19933 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19935 if (mask & (1 << i))
19936 num_regs++;
19937 if (dwarf_regs_mask & (1 << i))
19938 num_dwarf_regs++;
19941 gcc_assert (num_regs && num_regs <= 16);
19942 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
19944 /* For the body of the insn we are going to generate an UNSPEC in
19945 parallel with several USEs. This allows the insn to be recognized
19946 by the push_multi pattern in the arm.md file.
19948 The body of the insn looks something like this:
19950 (parallel [
19951 (set (mem:BLK (pre_modify:SI (reg:SI sp)
19952 (const_int:SI <num>)))
19953 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
19954 (use (reg:SI XX))
19955 (use (reg:SI YY))
19959 For the frame note however, we try to be more explicit and actually
19960 show each register being stored into the stack frame, plus a (single)
19961 decrement of the stack pointer. We do it this way in order to be
19962 friendly to the stack unwinding code, which only wants to see a single
19963 stack decrement per instruction. The RTL we generate for the note looks
19964 something like this:
19966 (sequence [
19967 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
19968 (set (mem:SI (reg:SI sp)) (reg:SI r4))
19969 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
19970 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
19974 FIXME:: In an ideal world the PRE_MODIFY would not exist and
19975 instead we'd have a parallel expression detailing all
19976 the stores to the various memory addresses so that debug
19977 information is more up-to-date. Remember however while writing
19978 this to take care of the constraints with the push instruction.
19980 Note also that this has to be taken care of for the VFP registers.
19982 For more see PR43399. */
19984 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
19985 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
19986 dwarf_par_index = 1;
19988 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19990 if (mask & (1 << i))
19992 reg = gen_rtx_REG (SImode, i);
19994 XVECEXP (par, 0, 0)
19995 = gen_rtx_SET (VOIDmode,
19996 gen_frame_mem
19997 (BLKmode,
19998 gen_rtx_PRE_MODIFY (Pmode,
19999 stack_pointer_rtx,
20000 plus_constant
20001 (Pmode, stack_pointer_rtx,
20002 -4 * num_regs))
20004 gen_rtx_UNSPEC (BLKmode,
20005 gen_rtvec (1, reg),
20006 UNSPEC_PUSH_MULT));
20008 if (dwarf_regs_mask & (1 << i))
20010 tmp = gen_rtx_SET (VOIDmode,
20011 gen_frame_mem (SImode, stack_pointer_rtx),
20012 reg);
20013 RTX_FRAME_RELATED_P (tmp) = 1;
20014 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20017 break;
20021 for (j = 1, i++; j < num_regs; i++)
20023 if (mask & (1 << i))
20025 reg = gen_rtx_REG (SImode, i);
20027 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20029 if (dwarf_regs_mask & (1 << i))
20032 = gen_rtx_SET (VOIDmode,
20033 gen_frame_mem
20034 (SImode,
20035 plus_constant (Pmode, stack_pointer_rtx,
20036 4 * j)),
20037 reg);
20038 RTX_FRAME_RELATED_P (tmp) = 1;
20039 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20042 j++;
20046 par = emit_insn (par);
20048 tmp = gen_rtx_SET (VOIDmode,
20049 stack_pointer_rtx,
20050 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20051 RTX_FRAME_RELATED_P (tmp) = 1;
20052 XVECEXP (dwarf, 0, 0) = tmp;
20054 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20056 return par;
20059 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20060 SIZE is the offset to be adjusted.
20061 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20062 static void
20063 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20065 rtx dwarf;
20067 RTX_FRAME_RELATED_P (insn) = 1;
20068 dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
20069 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20072 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20073 SAVED_REGS_MASK shows which registers need to be restored.
20075 Unfortunately, since this insn does not reflect very well the actual
20076 semantics of the operation, we need to annotate the insn for the benefit
20077 of DWARF2 frame unwind information. */
20078 static void
20079 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20081 int num_regs = 0;
20082 int i, j;
20083 rtx par;
20084 rtx dwarf = NULL_RTX;
20085 rtx tmp, reg;
20086 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20087 int offset_adj;
20088 int emit_update;
20090 offset_adj = return_in_pc ? 1 : 0;
20091 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20092 if (saved_regs_mask & (1 << i))
20093 num_regs++;
20095 gcc_assert (num_regs && num_regs <= 16);
20097 /* If SP is in reglist, then we don't emit SP update insn. */
20098 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20100 /* The parallel needs to hold num_regs SETs
20101 and one SET for the stack update. */
20102 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20104 if (return_in_pc)
20105 XVECEXP (par, 0, 0) = ret_rtx;
20107 if (emit_update)
20109 /* Increment the stack pointer, based on there being
20110 num_regs 4-byte registers to restore. */
20111 tmp = gen_rtx_SET (VOIDmode,
20112 stack_pointer_rtx,
20113 plus_constant (Pmode,
20114 stack_pointer_rtx,
20115 4 * num_regs));
20116 RTX_FRAME_RELATED_P (tmp) = 1;
20117 XVECEXP (par, 0, offset_adj) = tmp;
20120 /* Now restore every reg, which may include PC. */
20121 for (j = 0, i = 0; j < num_regs; i++)
20122 if (saved_regs_mask & (1 << i))
20124 reg = gen_rtx_REG (SImode, i);
20125 if ((num_regs == 1) && emit_update && !return_in_pc)
20127 /* Emit single load with writeback. */
20128 tmp = gen_frame_mem (SImode,
20129 gen_rtx_POST_INC (Pmode,
20130 stack_pointer_rtx));
20131 tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
20132 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20133 return;
20136 tmp = gen_rtx_SET (VOIDmode,
20137 reg,
20138 gen_frame_mem
20139 (SImode,
20140 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20141 RTX_FRAME_RELATED_P (tmp) = 1;
20142 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20144 /* We need to maintain a sequence for DWARF info too. As dwarf info
20145 should not have PC, skip PC. */
20146 if (i != PC_REGNUM)
20147 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20149 j++;
20152 if (return_in_pc)
20153 par = emit_jump_insn (par);
20154 else
20155 par = emit_insn (par);
20157 REG_NOTES (par) = dwarf;
20158 if (!return_in_pc)
20159 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20160 stack_pointer_rtx, stack_pointer_rtx);
20163 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20164 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20166 Unfortunately, since this insn does not reflect very well the actual
20167 semantics of the operation, we need to annotate the insn for the benefit
20168 of DWARF2 frame unwind information. */
20169 static void
20170 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20172 int i, j;
20173 rtx par;
20174 rtx dwarf = NULL_RTX;
20175 rtx tmp, reg;
20177 gcc_assert (num_regs && num_regs <= 32);
20179 /* Workaround ARM10 VFPr1 bug. */
20180 if (num_regs == 2 && !arm_arch6)
20182 if (first_reg == 15)
20183 first_reg--;
20185 num_regs++;
20188 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20189 there could be up to 32 D-registers to restore.
20190 If there are more than 16 D-registers, make two recursive calls,
20191 each of which emits one pop_multi instruction. */
20192 if (num_regs > 16)
20194 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20195 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20196 return;
20199 /* The parallel needs to hold num_regs SETs
20200 and one SET for the stack update. */
20201 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20203 /* Increment the stack pointer, based on there being
20204 num_regs 8-byte registers to restore. */
20205 tmp = gen_rtx_SET (VOIDmode,
20206 base_reg,
20207 plus_constant (Pmode, base_reg, 8 * num_regs));
20208 RTX_FRAME_RELATED_P (tmp) = 1;
20209 XVECEXP (par, 0, 0) = tmp;
20211 /* Now show every reg that will be restored, using a SET for each. */
20212 for (j = 0, i=first_reg; j < num_regs; i += 2)
20214 reg = gen_rtx_REG (DFmode, i);
20216 tmp = gen_rtx_SET (VOIDmode,
20217 reg,
20218 gen_frame_mem
20219 (DFmode,
20220 plus_constant (Pmode, base_reg, 8 * j)));
20221 RTX_FRAME_RELATED_P (tmp) = 1;
20222 XVECEXP (par, 0, j + 1) = tmp;
20224 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20226 j++;
20229 par = emit_insn (par);
20230 REG_NOTES (par) = dwarf;
20232 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20233 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20235 RTX_FRAME_RELATED_P (par) = 1;
20236 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20238 else
20239 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20240 base_reg, base_reg);
20243 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20244 number of registers are being popped, multiple LDRD patterns are created for
20245 all register pairs. If odd number of registers are popped, last register is
20246 loaded by using LDR pattern. */
20247 static void
20248 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20250 int num_regs = 0;
20251 int i, j;
20252 rtx par = NULL_RTX;
20253 rtx dwarf = NULL_RTX;
20254 rtx tmp, reg, tmp1;
20255 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20257 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20258 if (saved_regs_mask & (1 << i))
20259 num_regs++;
20261 gcc_assert (num_regs && num_regs <= 16);
20263 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20264 to be popped. So, if num_regs is even, now it will become odd,
20265 and we can generate pop with PC. If num_regs is odd, it will be
20266 even now, and ldr with return can be generated for PC. */
20267 if (return_in_pc)
20268 num_regs--;
20270 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20272 /* Var j iterates over all the registers to gather all the registers in
20273 saved_regs_mask. Var i gives index of saved registers in stack frame.
20274 A PARALLEL RTX of register-pair is created here, so that pattern for
20275 LDRD can be matched. As PC is always last register to be popped, and
20276 we have already decremented num_regs if PC, we don't have to worry
20277 about PC in this loop. */
20278 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20279 if (saved_regs_mask & (1 << j))
20281 /* Create RTX for memory load. */
20282 reg = gen_rtx_REG (SImode, j);
20283 tmp = gen_rtx_SET (SImode,
20284 reg,
20285 gen_frame_mem (SImode,
20286 plus_constant (Pmode,
20287 stack_pointer_rtx, 4 * i)));
20288 RTX_FRAME_RELATED_P (tmp) = 1;
20290 if (i % 2 == 0)
20292 /* When saved-register index (i) is even, the RTX to be emitted is
20293 yet to be created. Hence create it first. The LDRD pattern we
20294 are generating is :
20295 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20296 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20297 where target registers need not be consecutive. */
20298 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20299 dwarf = NULL_RTX;
20302 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20303 added as 0th element and if i is odd, reg_i is added as 1st element
20304 of LDRD pattern shown above. */
20305 XVECEXP (par, 0, (i % 2)) = tmp;
20306 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20308 if ((i % 2) == 1)
20310 /* When saved-register index (i) is odd, RTXs for both the registers
20311 to be loaded are generated in above given LDRD pattern, and the
20312 pattern can be emitted now. */
20313 par = emit_insn (par);
20314 REG_NOTES (par) = dwarf;
20315 RTX_FRAME_RELATED_P (par) = 1;
20318 i++;
20321 /* If the number of registers pushed is odd AND return_in_pc is false OR
20322 number of registers are even AND return_in_pc is true, last register is
20323 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20324 then LDR with post increment. */
20326 /* Increment the stack pointer, based on there being
20327 num_regs 4-byte registers to restore. */
20328 tmp = gen_rtx_SET (VOIDmode,
20329 stack_pointer_rtx,
20330 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20331 RTX_FRAME_RELATED_P (tmp) = 1;
20332 tmp = emit_insn (tmp);
20333 if (!return_in_pc)
20335 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20336 stack_pointer_rtx, stack_pointer_rtx);
20339 dwarf = NULL_RTX;
20341 if (((num_regs % 2) == 1 && !return_in_pc)
20342 || ((num_regs % 2) == 0 && return_in_pc))
20344 /* Scan for the single register to be popped. Skip until the saved
20345 register is found. */
20346 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20348 /* Gen LDR with post increment here. */
20349 tmp1 = gen_rtx_MEM (SImode,
20350 gen_rtx_POST_INC (SImode,
20351 stack_pointer_rtx));
20352 set_mem_alias_set (tmp1, get_frame_alias_set ());
20354 reg = gen_rtx_REG (SImode, j);
20355 tmp = gen_rtx_SET (SImode, reg, tmp1);
20356 RTX_FRAME_RELATED_P (tmp) = 1;
20357 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20359 if (return_in_pc)
20361 /* If return_in_pc, j must be PC_REGNUM. */
20362 gcc_assert (j == PC_REGNUM);
20363 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20364 XVECEXP (par, 0, 0) = ret_rtx;
20365 XVECEXP (par, 0, 1) = tmp;
20366 par = emit_jump_insn (par);
20368 else
20370 par = emit_insn (tmp);
20371 REG_NOTES (par) = dwarf;
20372 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20373 stack_pointer_rtx, stack_pointer_rtx);
20377 else if ((num_regs % 2) == 1 && return_in_pc)
20379 /* There are 2 registers to be popped. So, generate the pattern
20380 pop_multiple_with_stack_update_and_return to pop in PC. */
20381 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20384 return;
20387 /* LDRD in ARM mode needs consecutive registers as operands. This function
20388 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20389 offset addressing and then generates one separate stack udpate. This provides
20390 more scheduling freedom, compared to writeback on every load. However,
20391 if the function returns using load into PC directly
20392 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20393 before the last load. TODO: Add a peephole optimization to recognize
20394 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20395 peephole optimization to merge the load at stack-offset zero
20396 with the stack update instruction using load with writeback
20397 in post-index addressing mode. */
20398 static void
20399 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20401 int j = 0;
20402 int offset = 0;
20403 rtx par = NULL_RTX;
20404 rtx dwarf = NULL_RTX;
20405 rtx tmp, mem;
20407 /* Restore saved registers. */
20408 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20409 j = 0;
20410 while (j <= LAST_ARM_REGNUM)
20411 if (saved_regs_mask & (1 << j))
20413 if ((j % 2) == 0
20414 && (saved_regs_mask & (1 << (j + 1)))
20415 && (j + 1) != PC_REGNUM)
20417 /* Current register and next register form register pair for which
20418 LDRD can be generated. PC is always the last register popped, and
20419 we handle it separately. */
20420 if (offset > 0)
20421 mem = gen_frame_mem (DImode,
20422 plus_constant (Pmode,
20423 stack_pointer_rtx,
20424 offset));
20425 else
20426 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20428 tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20429 tmp = emit_insn (tmp);
20430 RTX_FRAME_RELATED_P (tmp) = 1;
20432 /* Generate dwarf info. */
20434 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20435 gen_rtx_REG (SImode, j),
20436 NULL_RTX);
20437 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20438 gen_rtx_REG (SImode, j + 1),
20439 dwarf);
20441 REG_NOTES (tmp) = dwarf;
20443 offset += 8;
20444 j += 2;
20446 else if (j != PC_REGNUM)
20448 /* Emit a single word load. */
20449 if (offset > 0)
20450 mem = gen_frame_mem (SImode,
20451 plus_constant (Pmode,
20452 stack_pointer_rtx,
20453 offset));
20454 else
20455 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20457 tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20458 tmp = emit_insn (tmp);
20459 RTX_FRAME_RELATED_P (tmp) = 1;
20461 /* Generate dwarf info. */
20462 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20463 gen_rtx_REG (SImode, j),
20464 NULL_RTX);
20466 offset += 4;
20467 j += 1;
20469 else /* j == PC_REGNUM */
20470 j++;
20472 else
20473 j++;
20475 /* Update the stack. */
20476 if (offset > 0)
20478 tmp = gen_rtx_SET (Pmode,
20479 stack_pointer_rtx,
20480 plus_constant (Pmode,
20481 stack_pointer_rtx,
20482 offset));
20483 tmp = emit_insn (tmp);
20484 arm_add_cfa_adjust_cfa_note (tmp, offset,
20485 stack_pointer_rtx, stack_pointer_rtx);
20486 offset = 0;
20489 if (saved_regs_mask & (1 << PC_REGNUM))
20491 /* Only PC is to be popped. */
20492 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20493 XVECEXP (par, 0, 0) = ret_rtx;
20494 tmp = gen_rtx_SET (SImode,
20495 gen_rtx_REG (SImode, PC_REGNUM),
20496 gen_frame_mem (SImode,
20497 gen_rtx_POST_INC (SImode,
20498 stack_pointer_rtx)));
20499 RTX_FRAME_RELATED_P (tmp) = 1;
20500 XVECEXP (par, 0, 1) = tmp;
20501 par = emit_jump_insn (par);
20503 /* Generate dwarf info. */
20504 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20505 gen_rtx_REG (SImode, PC_REGNUM),
20506 NULL_RTX);
20507 REG_NOTES (par) = dwarf;
20508 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20509 stack_pointer_rtx, stack_pointer_rtx);
20513 /* Calculate the size of the return value that is passed in registers. */
20514 static unsigned
20515 arm_size_return_regs (void)
20517 machine_mode mode;
20519 if (crtl->return_rtx != 0)
20520 mode = GET_MODE (crtl->return_rtx);
20521 else
20522 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20524 return GET_MODE_SIZE (mode);
20527 /* Return true if the current function needs to save/restore LR. */
20528 static bool
20529 thumb_force_lr_save (void)
20531 return !cfun->machine->lr_save_eliminated
20532 && (!leaf_function_p ()
20533 || thumb_far_jump_used_p ()
20534 || df_regs_ever_live_p (LR_REGNUM));
20537 /* We do not know if r3 will be available because
20538 we do have an indirect tailcall happening in this
20539 particular case. */
20540 static bool
20541 is_indirect_tailcall_p (rtx call)
20543 rtx pat = PATTERN (call);
20545 /* Indirect tail call. */
20546 pat = XVECEXP (pat, 0, 0);
20547 if (GET_CODE (pat) == SET)
20548 pat = SET_SRC (pat);
20550 pat = XEXP (XEXP (pat, 0), 0);
20551 return REG_P (pat);
20554 /* Return true if r3 is used by any of the tail call insns in the
20555 current function. */
20556 static bool
20557 any_sibcall_could_use_r3 (void)
20559 edge_iterator ei;
20560 edge e;
20562 if (!crtl->tail_call_emit)
20563 return false;
20564 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20565 if (e->flags & EDGE_SIBCALL)
20567 rtx call = BB_END (e->src);
20568 if (!CALL_P (call))
20569 call = prev_nonnote_nondebug_insn (call);
20570 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20571 if (find_regno_fusage (call, USE, 3)
20572 || is_indirect_tailcall_p (call))
20573 return true;
20575 return false;
20579 /* Compute the distance from register FROM to register TO.
20580 These can be the arg pointer (26), the soft frame pointer (25),
20581 the stack pointer (13) or the hard frame pointer (11).
20582 In thumb mode r7 is used as the soft frame pointer, if needed.
20583 Typical stack layout looks like this:
20585 old stack pointer -> | |
20586 ----
20587 | | \
20588 | | saved arguments for
20589 | | vararg functions
20590 | | /
20592 hard FP & arg pointer -> | | \
20593 | | stack
20594 | | frame
20595 | | /
20597 | | \
20598 | | call saved
20599 | | registers
20600 soft frame pointer -> | | /
20602 | | \
20603 | | local
20604 | | variables
20605 locals base pointer -> | | /
20607 | | \
20608 | | outgoing
20609 | | arguments
20610 current stack pointer -> | | /
20613 For a given function some or all of these stack components
20614 may not be needed, giving rise to the possibility of
20615 eliminating some of the registers.
20617 The values returned by this function must reflect the behavior
20618 of arm_expand_prologue() and arm_compute_save_reg_mask().
20620 The sign of the number returned reflects the direction of stack
20621 growth, so the values are positive for all eliminations except
20622 from the soft frame pointer to the hard frame pointer.
20624 SFP may point just inside the local variables block to ensure correct
20625 alignment. */
20628 /* Calculate stack offsets. These are used to calculate register elimination
20629 offsets and in prologue/epilogue code. Also calculates which registers
20630 should be saved. */
20632 static arm_stack_offsets *
20633 arm_get_frame_offsets (void)
20635 struct arm_stack_offsets *offsets;
20636 unsigned long func_type;
20637 int leaf;
20638 int saved;
20639 int core_saved;
20640 HOST_WIDE_INT frame_size;
20641 int i;
20643 offsets = &cfun->machine->stack_offsets;
20645 /* We need to know if we are a leaf function. Unfortunately, it
20646 is possible to be called after start_sequence has been called,
20647 which causes get_insns to return the insns for the sequence,
20648 not the function, which will cause leaf_function_p to return
20649 the incorrect result.
20651 to know about leaf functions once reload has completed, and the
20652 frame size cannot be changed after that time, so we can safely
20653 use the cached value. */
20655 if (reload_completed)
20656 return offsets;
20658 /* Initially this is the size of the local variables. It will translated
20659 into an offset once we have determined the size of preceding data. */
20660 frame_size = ROUND_UP_WORD (get_frame_size ());
20662 leaf = leaf_function_p ();
20664 /* Space for variadic functions. */
20665 offsets->saved_args = crtl->args.pretend_args_size;
20667 /* In Thumb mode this is incorrect, but never used. */
20668 offsets->frame
20669 = (offsets->saved_args
20670 + arm_compute_static_chain_stack_bytes ()
20671 + (frame_pointer_needed ? 4 : 0));
20673 if (TARGET_32BIT)
20675 unsigned int regno;
20677 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20678 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20679 saved = core_saved;
20681 /* We know that SP will be doubleword aligned on entry, and we must
20682 preserve that condition at any subroutine call. We also require the
20683 soft frame pointer to be doubleword aligned. */
20685 if (TARGET_REALLY_IWMMXT)
20687 /* Check for the call-saved iWMMXt registers. */
20688 for (regno = FIRST_IWMMXT_REGNUM;
20689 regno <= LAST_IWMMXT_REGNUM;
20690 regno++)
20691 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20692 saved += 8;
20695 func_type = arm_current_func_type ();
20696 /* Space for saved VFP registers. */
20697 if (! IS_VOLATILE (func_type)
20698 && TARGET_HARD_FLOAT && TARGET_VFP)
20699 saved += arm_get_vfp_saved_size ();
20701 else /* TARGET_THUMB1 */
20703 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20704 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20705 saved = core_saved;
20706 if (TARGET_BACKTRACE)
20707 saved += 16;
20710 /* Saved registers include the stack frame. */
20711 offsets->saved_regs
20712 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20713 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20715 /* A leaf function does not need any stack alignment if it has nothing
20716 on the stack. */
20717 if (leaf && frame_size == 0
20718 /* However if it calls alloca(), we have a dynamically allocated
20719 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20720 && ! cfun->calls_alloca)
20722 offsets->outgoing_args = offsets->soft_frame;
20723 offsets->locals_base = offsets->soft_frame;
20724 return offsets;
20727 /* Ensure SFP has the correct alignment. */
20728 if (ARM_DOUBLEWORD_ALIGN
20729 && (offsets->soft_frame & 7))
20731 offsets->soft_frame += 4;
20732 /* Try to align stack by pushing an extra reg. Don't bother doing this
20733 when there is a stack frame as the alignment will be rolled into
20734 the normal stack adjustment. */
20735 if (frame_size + crtl->outgoing_args_size == 0)
20737 int reg = -1;
20739 /* Register r3 is caller-saved. Normally it does not need to be
20740 saved on entry by the prologue. However if we choose to save
20741 it for padding then we may confuse the compiler into thinking
20742 a prologue sequence is required when in fact it is not. This
20743 will occur when shrink-wrapping if r3 is used as a scratch
20744 register and there are no other callee-saved writes.
20746 This situation can be avoided when other callee-saved registers
20747 are available and r3 is not mandatory if we choose a callee-saved
20748 register for padding. */
20749 bool prefer_callee_reg_p = false;
20751 /* If it is safe to use r3, then do so. This sometimes
20752 generates better code on Thumb-2 by avoiding the need to
20753 use 32-bit push/pop instructions. */
20754 if (! any_sibcall_could_use_r3 ()
20755 && arm_size_return_regs () <= 12
20756 && (offsets->saved_regs_mask & (1 << 3)) == 0
20757 && (TARGET_THUMB2
20758 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20760 reg = 3;
20761 if (!TARGET_THUMB2)
20762 prefer_callee_reg_p = true;
20764 if (reg == -1
20765 || prefer_callee_reg_p)
20767 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20769 /* Avoid fixed registers; they may be changed at
20770 arbitrary times so it's unsafe to restore them
20771 during the epilogue. */
20772 if (!fixed_regs[i]
20773 && (offsets->saved_regs_mask & (1 << i)) == 0)
20775 reg = i;
20776 break;
20781 if (reg != -1)
20783 offsets->saved_regs += 4;
20784 offsets->saved_regs_mask |= (1 << reg);
20789 offsets->locals_base = offsets->soft_frame + frame_size;
20790 offsets->outgoing_args = (offsets->locals_base
20791 + crtl->outgoing_args_size);
20793 if (ARM_DOUBLEWORD_ALIGN)
20795 /* Ensure SP remains doubleword aligned. */
20796 if (offsets->outgoing_args & 7)
20797 offsets->outgoing_args += 4;
20798 gcc_assert (!(offsets->outgoing_args & 7));
20801 return offsets;
20805 /* Calculate the relative offsets for the different stack pointers. Positive
20806 offsets are in the direction of stack growth. */
20808 HOST_WIDE_INT
20809 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20811 arm_stack_offsets *offsets;
20813 offsets = arm_get_frame_offsets ();
20815 /* OK, now we have enough information to compute the distances.
20816 There must be an entry in these switch tables for each pair
20817 of registers in ELIMINABLE_REGS, even if some of the entries
20818 seem to be redundant or useless. */
20819 switch (from)
20821 case ARG_POINTER_REGNUM:
20822 switch (to)
20824 case THUMB_HARD_FRAME_POINTER_REGNUM:
20825 return 0;
20827 case FRAME_POINTER_REGNUM:
20828 /* This is the reverse of the soft frame pointer
20829 to hard frame pointer elimination below. */
20830 return offsets->soft_frame - offsets->saved_args;
20832 case ARM_HARD_FRAME_POINTER_REGNUM:
20833 /* This is only non-zero in the case where the static chain register
20834 is stored above the frame. */
20835 return offsets->frame - offsets->saved_args - 4;
20837 case STACK_POINTER_REGNUM:
20838 /* If nothing has been pushed on the stack at all
20839 then this will return -4. This *is* correct! */
20840 return offsets->outgoing_args - (offsets->saved_args + 4);
20842 default:
20843 gcc_unreachable ();
20845 gcc_unreachable ();
20847 case FRAME_POINTER_REGNUM:
20848 switch (to)
20850 case THUMB_HARD_FRAME_POINTER_REGNUM:
20851 return 0;
20853 case ARM_HARD_FRAME_POINTER_REGNUM:
20854 /* The hard frame pointer points to the top entry in the
20855 stack frame. The soft frame pointer to the bottom entry
20856 in the stack frame. If there is no stack frame at all,
20857 then they are identical. */
20859 return offsets->frame - offsets->soft_frame;
20861 case STACK_POINTER_REGNUM:
20862 return offsets->outgoing_args - offsets->soft_frame;
20864 default:
20865 gcc_unreachable ();
20867 gcc_unreachable ();
20869 default:
20870 /* You cannot eliminate from the stack pointer.
20871 In theory you could eliminate from the hard frame
20872 pointer to the stack pointer, but this will never
20873 happen, since if a stack frame is not needed the
20874 hard frame pointer will never be used. */
20875 gcc_unreachable ();
20879 /* Given FROM and TO register numbers, say whether this elimination is
20880 allowed. Frame pointer elimination is automatically handled.
20882 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20883 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20884 pointer, we must eliminate FRAME_POINTER_REGNUM into
20885 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20886 ARG_POINTER_REGNUM. */
20888 bool
20889 arm_can_eliminate (const int from, const int to)
20891 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20892 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20893 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20894 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20895 true);
20898 /* Emit RTL to save coprocessor registers on function entry. Returns the
20899 number of bytes pushed. */
20901 static int
20902 arm_save_coproc_regs(void)
20904 int saved_size = 0;
20905 unsigned reg;
20906 unsigned start_reg;
20907 rtx insn;
20909 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20910 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20912 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20913 insn = gen_rtx_MEM (V2SImode, insn);
20914 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20915 RTX_FRAME_RELATED_P (insn) = 1;
20916 saved_size += 8;
20919 if (TARGET_HARD_FLOAT && TARGET_VFP)
20921 start_reg = FIRST_VFP_REGNUM;
20923 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
20925 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
20926 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
20928 if (start_reg != reg)
20929 saved_size += vfp_emit_fstmd (start_reg,
20930 (reg - start_reg) / 2);
20931 start_reg = reg + 2;
20934 if (start_reg != reg)
20935 saved_size += vfp_emit_fstmd (start_reg,
20936 (reg - start_reg) / 2);
20938 return saved_size;
20942 /* Set the Thumb frame pointer from the stack pointer. */
20944 static void
20945 thumb_set_frame_pointer (arm_stack_offsets *offsets)
20947 HOST_WIDE_INT amount;
20948 rtx insn, dwarf;
20950 amount = offsets->outgoing_args - offsets->locals_base;
20951 if (amount < 1024)
20952 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20953 stack_pointer_rtx, GEN_INT (amount)));
20954 else
20956 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
20957 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
20958 expects the first two operands to be the same. */
20959 if (TARGET_THUMB2)
20961 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20962 stack_pointer_rtx,
20963 hard_frame_pointer_rtx));
20965 else
20967 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20968 hard_frame_pointer_rtx,
20969 stack_pointer_rtx));
20971 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
20972 plus_constant (Pmode, stack_pointer_rtx, amount));
20973 RTX_FRAME_RELATED_P (dwarf) = 1;
20974 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20977 RTX_FRAME_RELATED_P (insn) = 1;
20980 /* Generate the prologue instructions for entry into an ARM or Thumb-2
20981 function. */
20982 void
20983 arm_expand_prologue (void)
20985 rtx amount;
20986 rtx insn;
20987 rtx ip_rtx;
20988 unsigned long live_regs_mask;
20989 unsigned long func_type;
20990 int fp_offset = 0;
20991 int saved_pretend_args = 0;
20992 int saved_regs = 0;
20993 unsigned HOST_WIDE_INT args_to_push;
20994 arm_stack_offsets *offsets;
20996 func_type = arm_current_func_type ();
20998 /* Naked functions don't have prologues. */
20999 if (IS_NAKED (func_type))
21000 return;
21002 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21003 args_to_push = crtl->args.pretend_args_size;
21005 /* Compute which register we will have to save onto the stack. */
21006 offsets = arm_get_frame_offsets ();
21007 live_regs_mask = offsets->saved_regs_mask;
21009 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21011 if (IS_STACKALIGN (func_type))
21013 rtx r0, r1;
21015 /* Handle a word-aligned stack pointer. We generate the following:
21017 mov r0, sp
21018 bic r1, r0, #7
21019 mov sp, r1
21020 <save and restore r0 in normal prologue/epilogue>
21021 mov sp, r0
21022 bx lr
21024 The unwinder doesn't need to know about the stack realignment.
21025 Just tell it we saved SP in r0. */
21026 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21028 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21029 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21031 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21032 RTX_FRAME_RELATED_P (insn) = 1;
21033 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21035 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21037 /* ??? The CFA changes here, which may cause GDB to conclude that it
21038 has entered a different function. That said, the unwind info is
21039 correct, individually, before and after this instruction because
21040 we've described the save of SP, which will override the default
21041 handling of SP as restoring from the CFA. */
21042 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21045 /* For APCS frames, if IP register is clobbered
21046 when creating frame, save that register in a special
21047 way. */
21048 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21050 if (IS_INTERRUPT (func_type))
21052 /* Interrupt functions must not corrupt any registers.
21053 Creating a frame pointer however, corrupts the IP
21054 register, so we must push it first. */
21055 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21057 /* Do not set RTX_FRAME_RELATED_P on this insn.
21058 The dwarf stack unwinding code only wants to see one
21059 stack decrement per function, and this is not it. If
21060 this instruction is labeled as being part of the frame
21061 creation sequence then dwarf2out_frame_debug_expr will
21062 die when it encounters the assignment of IP to FP
21063 later on, since the use of SP here establishes SP as
21064 the CFA register and not IP.
21066 Anyway this instruction is not really part of the stack
21067 frame creation although it is part of the prologue. */
21069 else if (IS_NESTED (func_type))
21071 /* The static chain register is the same as the IP register
21072 used as a scratch register during stack frame creation.
21073 To get around this need to find somewhere to store IP
21074 whilst the frame is being created. We try the following
21075 places in order:
21077 1. The last argument register r3 if it is available.
21078 2. A slot on the stack above the frame if there are no
21079 arguments to push onto the stack.
21080 3. Register r3 again, after pushing the argument registers
21081 onto the stack, if this is a varargs function.
21082 4. The last slot on the stack created for the arguments to
21083 push, if this isn't a varargs function.
21085 Note - we only need to tell the dwarf2 backend about the SP
21086 adjustment in the second variant; the static chain register
21087 doesn't need to be unwound, as it doesn't contain a value
21088 inherited from the caller. */
21090 if (!arm_r3_live_at_start_p ())
21091 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21092 else if (args_to_push == 0)
21094 rtx addr, dwarf;
21096 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21097 saved_regs += 4;
21099 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21100 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21101 fp_offset = 4;
21103 /* Just tell the dwarf backend that we adjusted SP. */
21104 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21105 plus_constant (Pmode, stack_pointer_rtx,
21106 -fp_offset));
21107 RTX_FRAME_RELATED_P (insn) = 1;
21108 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21110 else
21112 /* Store the args on the stack. */
21113 if (cfun->machine->uses_anonymous_args)
21115 insn
21116 = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21117 (0xf0 >> (args_to_push / 4)) & 0xf);
21118 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21119 saved_pretend_args = 1;
21121 else
21123 rtx addr, dwarf;
21125 if (args_to_push == 4)
21126 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21127 else
21128 addr
21129 = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21130 plus_constant (Pmode,
21131 stack_pointer_rtx,
21132 -args_to_push));
21134 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21136 /* Just tell the dwarf backend that we adjusted SP. */
21137 dwarf
21138 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21139 plus_constant (Pmode, stack_pointer_rtx,
21140 -args_to_push));
21141 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21144 RTX_FRAME_RELATED_P (insn) = 1;
21145 fp_offset = args_to_push;
21146 args_to_push = 0;
21150 insn = emit_set_insn (ip_rtx,
21151 plus_constant (Pmode, stack_pointer_rtx,
21152 fp_offset));
21153 RTX_FRAME_RELATED_P (insn) = 1;
21156 if (args_to_push)
21158 /* Push the argument registers, or reserve space for them. */
21159 if (cfun->machine->uses_anonymous_args)
21160 insn = emit_multi_reg_push
21161 ((0xf0 >> (args_to_push / 4)) & 0xf,
21162 (0xf0 >> (args_to_push / 4)) & 0xf);
21163 else
21164 insn = emit_insn
21165 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21166 GEN_INT (- args_to_push)));
21167 RTX_FRAME_RELATED_P (insn) = 1;
21170 /* If this is an interrupt service routine, and the link register
21171 is going to be pushed, and we're not generating extra
21172 push of IP (needed when frame is needed and frame layout if apcs),
21173 subtracting four from LR now will mean that the function return
21174 can be done with a single instruction. */
21175 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21176 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21177 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21178 && TARGET_ARM)
21180 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21182 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21185 if (live_regs_mask)
21187 unsigned long dwarf_regs_mask = live_regs_mask;
21189 saved_regs += bit_count (live_regs_mask) * 4;
21190 if (optimize_size && !frame_pointer_needed
21191 && saved_regs == offsets->saved_regs - offsets->saved_args)
21193 /* If no coprocessor registers are being pushed and we don't have
21194 to worry about a frame pointer then push extra registers to
21195 create the stack frame. This is done is a way that does not
21196 alter the frame layout, so is independent of the epilogue. */
21197 int n;
21198 int frame;
21199 n = 0;
21200 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21201 n++;
21202 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21203 if (frame && n * 4 >= frame)
21205 n = frame / 4;
21206 live_regs_mask |= (1 << n) - 1;
21207 saved_regs += frame;
21211 if (TARGET_LDRD
21212 && current_tune->prefer_ldrd_strd
21213 && !optimize_function_for_size_p (cfun))
21215 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21216 if (TARGET_THUMB2)
21217 thumb2_emit_strd_push (live_regs_mask);
21218 else if (TARGET_ARM
21219 && !TARGET_APCS_FRAME
21220 && !IS_INTERRUPT (func_type))
21221 arm_emit_strd_push (live_regs_mask);
21222 else
21224 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21225 RTX_FRAME_RELATED_P (insn) = 1;
21228 else
21230 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21231 RTX_FRAME_RELATED_P (insn) = 1;
21235 if (! IS_VOLATILE (func_type))
21236 saved_regs += arm_save_coproc_regs ();
21238 if (frame_pointer_needed && TARGET_ARM)
21240 /* Create the new frame pointer. */
21241 if (TARGET_APCS_FRAME)
21243 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21244 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21245 RTX_FRAME_RELATED_P (insn) = 1;
21247 if (IS_NESTED (func_type))
21249 /* Recover the static chain register. */
21250 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21251 insn = gen_rtx_REG (SImode, 3);
21252 else
21254 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21255 insn = gen_frame_mem (SImode, insn);
21257 emit_set_insn (ip_rtx, insn);
21258 /* Add a USE to stop propagate_one_insn() from barfing. */
21259 emit_insn (gen_force_register_use (ip_rtx));
21262 else
21264 insn = GEN_INT (saved_regs - 4);
21265 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21266 stack_pointer_rtx, insn));
21267 RTX_FRAME_RELATED_P (insn) = 1;
21271 if (flag_stack_usage_info)
21272 current_function_static_stack_size
21273 = offsets->outgoing_args - offsets->saved_args;
21275 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21277 /* This add can produce multiple insns for a large constant, so we
21278 need to get tricky. */
21279 rtx_insn *last = get_last_insn ();
21281 amount = GEN_INT (offsets->saved_args + saved_regs
21282 - offsets->outgoing_args);
21284 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21285 amount));
21288 last = last ? NEXT_INSN (last) : get_insns ();
21289 RTX_FRAME_RELATED_P (last) = 1;
21291 while (last != insn);
21293 /* If the frame pointer is needed, emit a special barrier that
21294 will prevent the scheduler from moving stores to the frame
21295 before the stack adjustment. */
21296 if (frame_pointer_needed)
21297 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21298 hard_frame_pointer_rtx));
21302 if (frame_pointer_needed && TARGET_THUMB2)
21303 thumb_set_frame_pointer (offsets);
21305 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21307 unsigned long mask;
21309 mask = live_regs_mask;
21310 mask &= THUMB2_WORK_REGS;
21311 if (!IS_NESTED (func_type))
21312 mask |= (1 << IP_REGNUM);
21313 arm_load_pic_register (mask);
21316 /* If we are profiling, make sure no instructions are scheduled before
21317 the call to mcount. Similarly if the user has requested no
21318 scheduling in the prolog. Similarly if we want non-call exceptions
21319 using the EABI unwinder, to prevent faulting instructions from being
21320 swapped with a stack adjustment. */
21321 if (crtl->profile || !TARGET_SCHED_PROLOG
21322 || (arm_except_unwind_info (&global_options) == UI_TARGET
21323 && cfun->can_throw_non_call_exceptions))
21324 emit_insn (gen_blockage ());
21326 /* If the link register is being kept alive, with the return address in it,
21327 then make sure that it does not get reused by the ce2 pass. */
21328 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21329 cfun->machine->lr_save_eliminated = 1;
21332 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21333 static void
21334 arm_print_condition (FILE *stream)
21336 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21338 /* Branch conversion is not implemented for Thumb-2. */
21339 if (TARGET_THUMB)
21341 output_operand_lossage ("predicated Thumb instruction");
21342 return;
21344 if (current_insn_predicate != NULL)
21346 output_operand_lossage
21347 ("predicated instruction in conditional sequence");
21348 return;
21351 fputs (arm_condition_codes[arm_current_cc], stream);
21353 else if (current_insn_predicate)
21355 enum arm_cond_code code;
21357 if (TARGET_THUMB1)
21359 output_operand_lossage ("predicated Thumb instruction");
21360 return;
21363 code = get_arm_condition_code (current_insn_predicate);
21364 fputs (arm_condition_codes[code], stream);
21369 /* Globally reserved letters: acln
21370 Puncutation letters currently used: @_|?().!#
21371 Lower case letters currently used: bcdefhimpqtvwxyz
21372 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21373 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21375 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21377 If CODE is 'd', then the X is a condition operand and the instruction
21378 should only be executed if the condition is true.
21379 if CODE is 'D', then the X is a condition operand and the instruction
21380 should only be executed if the condition is false: however, if the mode
21381 of the comparison is CCFPEmode, then always execute the instruction -- we
21382 do this because in these circumstances !GE does not necessarily imply LT;
21383 in these cases the instruction pattern will take care to make sure that
21384 an instruction containing %d will follow, thereby undoing the effects of
21385 doing this instruction unconditionally.
21386 If CODE is 'N' then X is a floating point operand that must be negated
21387 before output.
21388 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21389 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21390 static void
21391 arm_print_operand (FILE *stream, rtx x, int code)
21393 switch (code)
21395 case '@':
21396 fputs (ASM_COMMENT_START, stream);
21397 return;
21399 case '_':
21400 fputs (user_label_prefix, stream);
21401 return;
21403 case '|':
21404 fputs (REGISTER_PREFIX, stream);
21405 return;
21407 case '?':
21408 arm_print_condition (stream);
21409 return;
21411 case '(':
21412 /* Nothing in unified syntax, otherwise the current condition code. */
21413 if (!TARGET_UNIFIED_ASM)
21414 arm_print_condition (stream);
21415 break;
21417 case ')':
21418 /* The current condition code in unified syntax, otherwise nothing. */
21419 if (TARGET_UNIFIED_ASM)
21420 arm_print_condition (stream);
21421 break;
21423 case '.':
21424 /* The current condition code for a condition code setting instruction.
21425 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21426 if (TARGET_UNIFIED_ASM)
21428 fputc('s', stream);
21429 arm_print_condition (stream);
21431 else
21433 arm_print_condition (stream);
21434 fputc('s', stream);
21436 return;
21438 case '!':
21439 /* If the instruction is conditionally executed then print
21440 the current condition code, otherwise print 's'. */
21441 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21442 if (current_insn_predicate)
21443 arm_print_condition (stream);
21444 else
21445 fputc('s', stream);
21446 break;
21448 /* %# is a "break" sequence. It doesn't output anything, but is used to
21449 separate e.g. operand numbers from following text, if that text consists
21450 of further digits which we don't want to be part of the operand
21451 number. */
21452 case '#':
21453 return;
21455 case 'N':
21457 REAL_VALUE_TYPE r;
21458 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21459 r = real_value_negate (&r);
21460 fprintf (stream, "%s", fp_const_from_val (&r));
21462 return;
21464 /* An integer or symbol address without a preceding # sign. */
21465 case 'c':
21466 switch (GET_CODE (x))
21468 case CONST_INT:
21469 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21470 break;
21472 case SYMBOL_REF:
21473 output_addr_const (stream, x);
21474 break;
21476 case CONST:
21477 if (GET_CODE (XEXP (x, 0)) == PLUS
21478 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21480 output_addr_const (stream, x);
21481 break;
21483 /* Fall through. */
21485 default:
21486 output_operand_lossage ("Unsupported operand for code '%c'", code);
21488 return;
21490 /* An integer that we want to print in HEX. */
21491 case 'x':
21492 switch (GET_CODE (x))
21494 case CONST_INT:
21495 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21496 break;
21498 default:
21499 output_operand_lossage ("Unsupported operand for code '%c'", code);
21501 return;
21503 case 'B':
21504 if (CONST_INT_P (x))
21506 HOST_WIDE_INT val;
21507 val = ARM_SIGN_EXTEND (~INTVAL (x));
21508 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21510 else
21512 putc ('~', stream);
21513 output_addr_const (stream, x);
21515 return;
21517 case 'b':
21518 /* Print the log2 of a CONST_INT. */
21520 HOST_WIDE_INT val;
21522 if (!CONST_INT_P (x)
21523 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21524 output_operand_lossage ("Unsupported operand for code '%c'", code);
21525 else
21526 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21528 return;
21530 case 'L':
21531 /* The low 16 bits of an immediate constant. */
21532 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21533 return;
21535 case 'i':
21536 fprintf (stream, "%s", arithmetic_instr (x, 1));
21537 return;
21539 case 'I':
21540 fprintf (stream, "%s", arithmetic_instr (x, 0));
21541 return;
21543 case 'S':
21545 HOST_WIDE_INT val;
21546 const char *shift;
21548 shift = shift_op (x, &val);
21550 if (shift)
21552 fprintf (stream, ", %s ", shift);
21553 if (val == -1)
21554 arm_print_operand (stream, XEXP (x, 1), 0);
21555 else
21556 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21559 return;
21561 /* An explanation of the 'Q', 'R' and 'H' register operands:
21563 In a pair of registers containing a DI or DF value the 'Q'
21564 operand returns the register number of the register containing
21565 the least significant part of the value. The 'R' operand returns
21566 the register number of the register containing the most
21567 significant part of the value.
21569 The 'H' operand returns the higher of the two register numbers.
21570 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21571 same as the 'Q' operand, since the most significant part of the
21572 value is held in the lower number register. The reverse is true
21573 on systems where WORDS_BIG_ENDIAN is false.
21575 The purpose of these operands is to distinguish between cases
21576 where the endian-ness of the values is important (for example
21577 when they are added together), and cases where the endian-ness
21578 is irrelevant, but the order of register operations is important.
21579 For example when loading a value from memory into a register
21580 pair, the endian-ness does not matter. Provided that the value
21581 from the lower memory address is put into the lower numbered
21582 register, and the value from the higher address is put into the
21583 higher numbered register, the load will work regardless of whether
21584 the value being loaded is big-wordian or little-wordian. The
21585 order of the two register loads can matter however, if the address
21586 of the memory location is actually held in one of the registers
21587 being overwritten by the load.
21589 The 'Q' and 'R' constraints are also available for 64-bit
21590 constants. */
21591 case 'Q':
21592 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21594 rtx part = gen_lowpart (SImode, x);
21595 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21596 return;
21599 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21601 output_operand_lossage ("invalid operand for code '%c'", code);
21602 return;
21605 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21606 return;
21608 case 'R':
21609 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21611 machine_mode mode = GET_MODE (x);
21612 rtx part;
21614 if (mode == VOIDmode)
21615 mode = DImode;
21616 part = gen_highpart_mode (SImode, mode, x);
21617 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21618 return;
21621 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21623 output_operand_lossage ("invalid operand for code '%c'", code);
21624 return;
21627 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21628 return;
21630 case 'H':
21631 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21633 output_operand_lossage ("invalid operand for code '%c'", code);
21634 return;
21637 asm_fprintf (stream, "%r", REGNO (x) + 1);
21638 return;
21640 case 'J':
21641 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21643 output_operand_lossage ("invalid operand for code '%c'", code);
21644 return;
21647 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21648 return;
21650 case 'K':
21651 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21653 output_operand_lossage ("invalid operand for code '%c'", code);
21654 return;
21657 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21658 return;
21660 case 'm':
21661 asm_fprintf (stream, "%r",
21662 REG_P (XEXP (x, 0))
21663 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21664 return;
21666 case 'M':
21667 asm_fprintf (stream, "{%r-%r}",
21668 REGNO (x),
21669 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21670 return;
21672 /* Like 'M', but writing doubleword vector registers, for use by Neon
21673 insns. */
21674 case 'h':
21676 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21677 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21678 if (numregs == 1)
21679 asm_fprintf (stream, "{d%d}", regno);
21680 else
21681 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21683 return;
21685 case 'd':
21686 /* CONST_TRUE_RTX means always -- that's the default. */
21687 if (x == const_true_rtx)
21688 return;
21690 if (!COMPARISON_P (x))
21692 output_operand_lossage ("invalid operand for code '%c'", code);
21693 return;
21696 fputs (arm_condition_codes[get_arm_condition_code (x)],
21697 stream);
21698 return;
21700 case 'D':
21701 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21702 want to do that. */
21703 if (x == const_true_rtx)
21705 output_operand_lossage ("instruction never executed");
21706 return;
21708 if (!COMPARISON_P (x))
21710 output_operand_lossage ("invalid operand for code '%c'", code);
21711 return;
21714 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21715 (get_arm_condition_code (x))],
21716 stream);
21717 return;
21719 case 's':
21720 case 'V':
21721 case 'W':
21722 case 'X':
21723 case 'Y':
21724 case 'Z':
21725 /* Former Maverick support, removed after GCC-4.7. */
21726 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21727 return;
21729 case 'U':
21730 if (!REG_P (x)
21731 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21732 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21733 /* Bad value for wCG register number. */
21735 output_operand_lossage ("invalid operand for code '%c'", code);
21736 return;
21739 else
21740 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21741 return;
21743 /* Print an iWMMXt control register name. */
21744 case 'w':
21745 if (!CONST_INT_P (x)
21746 || INTVAL (x) < 0
21747 || INTVAL (x) >= 16)
21748 /* Bad value for wC register number. */
21750 output_operand_lossage ("invalid operand for code '%c'", code);
21751 return;
21754 else
21756 static const char * wc_reg_names [16] =
21758 "wCID", "wCon", "wCSSF", "wCASF",
21759 "wC4", "wC5", "wC6", "wC7",
21760 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21761 "wC12", "wC13", "wC14", "wC15"
21764 fputs (wc_reg_names [INTVAL (x)], stream);
21766 return;
21768 /* Print the high single-precision register of a VFP double-precision
21769 register. */
21770 case 'p':
21772 machine_mode mode = GET_MODE (x);
21773 int regno;
21775 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21777 output_operand_lossage ("invalid operand for code '%c'", code);
21778 return;
21781 regno = REGNO (x);
21782 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21784 output_operand_lossage ("invalid operand for code '%c'", code);
21785 return;
21788 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21790 return;
21792 /* Print a VFP/Neon double precision or quad precision register name. */
21793 case 'P':
21794 case 'q':
21796 machine_mode mode = GET_MODE (x);
21797 int is_quad = (code == 'q');
21798 int regno;
21800 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21802 output_operand_lossage ("invalid operand for code '%c'", code);
21803 return;
21806 if (!REG_P (x)
21807 || !IS_VFP_REGNUM (REGNO (x)))
21809 output_operand_lossage ("invalid operand for code '%c'", code);
21810 return;
21813 regno = REGNO (x);
21814 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21815 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21817 output_operand_lossage ("invalid operand for code '%c'", code);
21818 return;
21821 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21822 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21824 return;
21826 /* These two codes print the low/high doubleword register of a Neon quad
21827 register, respectively. For pair-structure types, can also print
21828 low/high quadword registers. */
21829 case 'e':
21830 case 'f':
21832 machine_mode mode = GET_MODE (x);
21833 int regno;
21835 if ((GET_MODE_SIZE (mode) != 16
21836 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21838 output_operand_lossage ("invalid operand for code '%c'", code);
21839 return;
21842 regno = REGNO (x);
21843 if (!NEON_REGNO_OK_FOR_QUAD (regno))
21845 output_operand_lossage ("invalid operand for code '%c'", code);
21846 return;
21849 if (GET_MODE_SIZE (mode) == 16)
21850 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21851 + (code == 'f' ? 1 : 0));
21852 else
21853 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21854 + (code == 'f' ? 1 : 0));
21856 return;
21858 /* Print a VFPv3 floating-point constant, represented as an integer
21859 index. */
21860 case 'G':
21862 int index = vfp3_const_double_index (x);
21863 gcc_assert (index != -1);
21864 fprintf (stream, "%d", index);
21866 return;
21868 /* Print bits representing opcode features for Neon.
21870 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21871 and polynomials as unsigned.
21873 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21875 Bit 2 is 1 for rounding functions, 0 otherwise. */
21877 /* Identify the type as 's', 'u', 'p' or 'f'. */
21878 case 'T':
21880 HOST_WIDE_INT bits = INTVAL (x);
21881 fputc ("uspf"[bits & 3], stream);
21883 return;
21885 /* Likewise, but signed and unsigned integers are both 'i'. */
21886 case 'F':
21888 HOST_WIDE_INT bits = INTVAL (x);
21889 fputc ("iipf"[bits & 3], stream);
21891 return;
21893 /* As for 'T', but emit 'u' instead of 'p'. */
21894 case 't':
21896 HOST_WIDE_INT bits = INTVAL (x);
21897 fputc ("usuf"[bits & 3], stream);
21899 return;
21901 /* Bit 2: rounding (vs none). */
21902 case 'O':
21904 HOST_WIDE_INT bits = INTVAL (x);
21905 fputs ((bits & 4) != 0 ? "r" : "", stream);
21907 return;
21909 /* Memory operand for vld1/vst1 instruction. */
21910 case 'A':
21912 rtx addr;
21913 bool postinc = FALSE;
21914 rtx postinc_reg = NULL;
21915 unsigned align, memsize, align_bits;
21917 gcc_assert (MEM_P (x));
21918 addr = XEXP (x, 0);
21919 if (GET_CODE (addr) == POST_INC)
21921 postinc = 1;
21922 addr = XEXP (addr, 0);
21924 if (GET_CODE (addr) == POST_MODIFY)
21926 postinc_reg = XEXP( XEXP (addr, 1), 1);
21927 addr = XEXP (addr, 0);
21929 asm_fprintf (stream, "[%r", REGNO (addr));
21931 /* We know the alignment of this access, so we can emit a hint in the
21932 instruction (for some alignments) as an aid to the memory subsystem
21933 of the target. */
21934 align = MEM_ALIGN (x) >> 3;
21935 memsize = MEM_SIZE (x);
21937 /* Only certain alignment specifiers are supported by the hardware. */
21938 if (memsize == 32 && (align % 32) == 0)
21939 align_bits = 256;
21940 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
21941 align_bits = 128;
21942 else if (memsize >= 8 && (align % 8) == 0)
21943 align_bits = 64;
21944 else
21945 align_bits = 0;
21947 if (align_bits != 0)
21948 asm_fprintf (stream, ":%d", align_bits);
21950 asm_fprintf (stream, "]");
21952 if (postinc)
21953 fputs("!", stream);
21954 if (postinc_reg)
21955 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
21957 return;
21959 case 'C':
21961 rtx addr;
21963 gcc_assert (MEM_P (x));
21964 addr = XEXP (x, 0);
21965 gcc_assert (REG_P (addr));
21966 asm_fprintf (stream, "[%r]", REGNO (addr));
21968 return;
21970 /* Translate an S register number into a D register number and element index. */
21971 case 'y':
21973 machine_mode mode = GET_MODE (x);
21974 int regno;
21976 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
21978 output_operand_lossage ("invalid operand for code '%c'", code);
21979 return;
21982 regno = REGNO (x);
21983 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21985 output_operand_lossage ("invalid operand for code '%c'", code);
21986 return;
21989 regno = regno - FIRST_VFP_REGNUM;
21990 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
21992 return;
21994 case 'v':
21995 gcc_assert (CONST_DOUBLE_P (x));
21996 int result;
21997 result = vfp3_const_double_for_fract_bits (x);
21998 if (result == 0)
21999 result = vfp3_const_double_for_bits (x);
22000 fprintf (stream, "#%d", result);
22001 return;
22003 /* Register specifier for vld1.16/vst1.16. Translate the S register
22004 number into a D register number and element index. */
22005 case 'z':
22007 machine_mode mode = GET_MODE (x);
22008 int regno;
22010 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22012 output_operand_lossage ("invalid operand for code '%c'", code);
22013 return;
22016 regno = REGNO (x);
22017 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22019 output_operand_lossage ("invalid operand for code '%c'", code);
22020 return;
22023 regno = regno - FIRST_VFP_REGNUM;
22024 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22026 return;
22028 default:
22029 if (x == 0)
22031 output_operand_lossage ("missing operand");
22032 return;
22035 switch (GET_CODE (x))
22037 case REG:
22038 asm_fprintf (stream, "%r", REGNO (x));
22039 break;
22041 case MEM:
22042 output_memory_reference_mode = GET_MODE (x);
22043 output_address (XEXP (x, 0));
22044 break;
22046 case CONST_DOUBLE:
22048 char fpstr[20];
22049 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22050 sizeof (fpstr), 0, 1);
22051 fprintf (stream, "#%s", fpstr);
22053 break;
22055 default:
22056 gcc_assert (GET_CODE (x) != NEG);
22057 fputc ('#', stream);
22058 if (GET_CODE (x) == HIGH)
22060 fputs (":lower16:", stream);
22061 x = XEXP (x, 0);
22064 output_addr_const (stream, x);
22065 break;
22070 /* Target hook for printing a memory address. */
22071 static void
22072 arm_print_operand_address (FILE *stream, rtx x)
22074 if (TARGET_32BIT)
22076 int is_minus = GET_CODE (x) == MINUS;
22078 if (REG_P (x))
22079 asm_fprintf (stream, "[%r]", REGNO (x));
22080 else if (GET_CODE (x) == PLUS || is_minus)
22082 rtx base = XEXP (x, 0);
22083 rtx index = XEXP (x, 1);
22084 HOST_WIDE_INT offset = 0;
22085 if (!REG_P (base)
22086 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22088 /* Ensure that BASE is a register. */
22089 /* (one of them must be). */
22090 /* Also ensure the SP is not used as in index register. */
22091 std::swap (base, index);
22093 switch (GET_CODE (index))
22095 case CONST_INT:
22096 offset = INTVAL (index);
22097 if (is_minus)
22098 offset = -offset;
22099 asm_fprintf (stream, "[%r, #%wd]",
22100 REGNO (base), offset);
22101 break;
22103 case REG:
22104 asm_fprintf (stream, "[%r, %s%r]",
22105 REGNO (base), is_minus ? "-" : "",
22106 REGNO (index));
22107 break;
22109 case MULT:
22110 case ASHIFTRT:
22111 case LSHIFTRT:
22112 case ASHIFT:
22113 case ROTATERT:
22115 asm_fprintf (stream, "[%r, %s%r",
22116 REGNO (base), is_minus ? "-" : "",
22117 REGNO (XEXP (index, 0)));
22118 arm_print_operand (stream, index, 'S');
22119 fputs ("]", stream);
22120 break;
22123 default:
22124 gcc_unreachable ();
22127 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22128 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22130 extern machine_mode output_memory_reference_mode;
22132 gcc_assert (REG_P (XEXP (x, 0)));
22134 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22135 asm_fprintf (stream, "[%r, #%s%d]!",
22136 REGNO (XEXP (x, 0)),
22137 GET_CODE (x) == PRE_DEC ? "-" : "",
22138 GET_MODE_SIZE (output_memory_reference_mode));
22139 else
22140 asm_fprintf (stream, "[%r], #%s%d",
22141 REGNO (XEXP (x, 0)),
22142 GET_CODE (x) == POST_DEC ? "-" : "",
22143 GET_MODE_SIZE (output_memory_reference_mode));
22145 else if (GET_CODE (x) == PRE_MODIFY)
22147 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22148 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22149 asm_fprintf (stream, "#%wd]!",
22150 INTVAL (XEXP (XEXP (x, 1), 1)));
22151 else
22152 asm_fprintf (stream, "%r]!",
22153 REGNO (XEXP (XEXP (x, 1), 1)));
22155 else if (GET_CODE (x) == POST_MODIFY)
22157 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22158 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22159 asm_fprintf (stream, "#%wd",
22160 INTVAL (XEXP (XEXP (x, 1), 1)));
22161 else
22162 asm_fprintf (stream, "%r",
22163 REGNO (XEXP (XEXP (x, 1), 1)));
22165 else output_addr_const (stream, x);
22167 else
22169 if (REG_P (x))
22170 asm_fprintf (stream, "[%r]", REGNO (x));
22171 else if (GET_CODE (x) == POST_INC)
22172 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22173 else if (GET_CODE (x) == PLUS)
22175 gcc_assert (REG_P (XEXP (x, 0)));
22176 if (CONST_INT_P (XEXP (x, 1)))
22177 asm_fprintf (stream, "[%r, #%wd]",
22178 REGNO (XEXP (x, 0)),
22179 INTVAL (XEXP (x, 1)));
22180 else
22181 asm_fprintf (stream, "[%r, %r]",
22182 REGNO (XEXP (x, 0)),
22183 REGNO (XEXP (x, 1)));
22185 else
22186 output_addr_const (stream, x);
22190 /* Target hook for indicating whether a punctuation character for
22191 TARGET_PRINT_OPERAND is valid. */
22192 static bool
22193 arm_print_operand_punct_valid_p (unsigned char code)
22195 return (code == '@' || code == '|' || code == '.'
22196 || code == '(' || code == ')' || code == '#'
22197 || (TARGET_32BIT && (code == '?'))
22198 || (TARGET_THUMB2 && (code == '!'))
22199 || (TARGET_THUMB && (code == '_')));
22202 /* Target hook for assembling integer objects. The ARM version needs to
22203 handle word-sized values specially. */
22204 static bool
22205 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22207 machine_mode mode;
22209 if (size == UNITS_PER_WORD && aligned_p)
22211 fputs ("\t.word\t", asm_out_file);
22212 output_addr_const (asm_out_file, x);
22214 /* Mark symbols as position independent. We only do this in the
22215 .text segment, not in the .data segment. */
22216 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22217 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22219 /* See legitimize_pic_address for an explanation of the
22220 TARGET_VXWORKS_RTP check. */
22221 if (!arm_pic_data_is_text_relative
22222 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22223 fputs ("(GOT)", asm_out_file);
22224 else
22225 fputs ("(GOTOFF)", asm_out_file);
22227 fputc ('\n', asm_out_file);
22228 return true;
22231 mode = GET_MODE (x);
22233 if (arm_vector_mode_supported_p (mode))
22235 int i, units;
22237 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22239 units = CONST_VECTOR_NUNITS (x);
22240 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22242 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22243 for (i = 0; i < units; i++)
22245 rtx elt = CONST_VECTOR_ELT (x, i);
22246 assemble_integer
22247 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22249 else
22250 for (i = 0; i < units; i++)
22252 rtx elt = CONST_VECTOR_ELT (x, i);
22253 REAL_VALUE_TYPE rval;
22255 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22257 assemble_real
22258 (rval, GET_MODE_INNER (mode),
22259 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22262 return true;
22265 return default_assemble_integer (x, size, aligned_p);
22268 static void
22269 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22271 section *s;
22273 if (!TARGET_AAPCS_BASED)
22275 (is_ctor ?
22276 default_named_section_asm_out_constructor
22277 : default_named_section_asm_out_destructor) (symbol, priority);
22278 return;
22281 /* Put these in the .init_array section, using a special relocation. */
22282 if (priority != DEFAULT_INIT_PRIORITY)
22284 char buf[18];
22285 sprintf (buf, "%s.%.5u",
22286 is_ctor ? ".init_array" : ".fini_array",
22287 priority);
22288 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22290 else if (is_ctor)
22291 s = ctors_section;
22292 else
22293 s = dtors_section;
22295 switch_to_section (s);
22296 assemble_align (POINTER_SIZE);
22297 fputs ("\t.word\t", asm_out_file);
22298 output_addr_const (asm_out_file, symbol);
22299 fputs ("(target1)\n", asm_out_file);
22302 /* Add a function to the list of static constructors. */
22304 static void
22305 arm_elf_asm_constructor (rtx symbol, int priority)
22307 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22310 /* Add a function to the list of static destructors. */
22312 static void
22313 arm_elf_asm_destructor (rtx symbol, int priority)
22315 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22318 /* A finite state machine takes care of noticing whether or not instructions
22319 can be conditionally executed, and thus decrease execution time and code
22320 size by deleting branch instructions. The fsm is controlled by
22321 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22323 /* The state of the fsm controlling condition codes are:
22324 0: normal, do nothing special
22325 1: make ASM_OUTPUT_OPCODE not output this instruction
22326 2: make ASM_OUTPUT_OPCODE not output this instruction
22327 3: make instructions conditional
22328 4: make instructions conditional
22330 State transitions (state->state by whom under condition):
22331 0 -> 1 final_prescan_insn if the `target' is a label
22332 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22333 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22334 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22335 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22336 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22337 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22338 (the target insn is arm_target_insn).
22340 If the jump clobbers the conditions then we use states 2 and 4.
22342 A similar thing can be done with conditional return insns.
22344 XXX In case the `target' is an unconditional branch, this conditionalising
22345 of the instructions always reduces code size, but not always execution
22346 time. But then, I want to reduce the code size to somewhere near what
22347 /bin/cc produces. */
22349 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22350 instructions. When a COND_EXEC instruction is seen the subsequent
22351 instructions are scanned so that multiple conditional instructions can be
22352 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22353 specify the length and true/false mask for the IT block. These will be
22354 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22356 /* Returns the index of the ARM condition code string in
22357 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22358 COMPARISON should be an rtx like `(eq (...) (...))'. */
22360 enum arm_cond_code
22361 maybe_get_arm_condition_code (rtx comparison)
22363 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22364 enum arm_cond_code code;
22365 enum rtx_code comp_code = GET_CODE (comparison);
22367 if (GET_MODE_CLASS (mode) != MODE_CC)
22368 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22369 XEXP (comparison, 1));
22371 switch (mode)
22373 case CC_DNEmode: code = ARM_NE; goto dominance;
22374 case CC_DEQmode: code = ARM_EQ; goto dominance;
22375 case CC_DGEmode: code = ARM_GE; goto dominance;
22376 case CC_DGTmode: code = ARM_GT; goto dominance;
22377 case CC_DLEmode: code = ARM_LE; goto dominance;
22378 case CC_DLTmode: code = ARM_LT; goto dominance;
22379 case CC_DGEUmode: code = ARM_CS; goto dominance;
22380 case CC_DGTUmode: code = ARM_HI; goto dominance;
22381 case CC_DLEUmode: code = ARM_LS; goto dominance;
22382 case CC_DLTUmode: code = ARM_CC;
22384 dominance:
22385 if (comp_code == EQ)
22386 return ARM_INVERSE_CONDITION_CODE (code);
22387 if (comp_code == NE)
22388 return code;
22389 return ARM_NV;
22391 case CC_NOOVmode:
22392 switch (comp_code)
22394 case NE: return ARM_NE;
22395 case EQ: return ARM_EQ;
22396 case GE: return ARM_PL;
22397 case LT: return ARM_MI;
22398 default: return ARM_NV;
22401 case CC_Zmode:
22402 switch (comp_code)
22404 case NE: return ARM_NE;
22405 case EQ: return ARM_EQ;
22406 default: return ARM_NV;
22409 case CC_Nmode:
22410 switch (comp_code)
22412 case NE: return ARM_MI;
22413 case EQ: return ARM_PL;
22414 default: return ARM_NV;
22417 case CCFPEmode:
22418 case CCFPmode:
22419 /* We can handle all cases except UNEQ and LTGT. */
22420 switch (comp_code)
22422 case GE: return ARM_GE;
22423 case GT: return ARM_GT;
22424 case LE: return ARM_LS;
22425 case LT: return ARM_MI;
22426 case NE: return ARM_NE;
22427 case EQ: return ARM_EQ;
22428 case ORDERED: return ARM_VC;
22429 case UNORDERED: return ARM_VS;
22430 case UNLT: return ARM_LT;
22431 case UNLE: return ARM_LE;
22432 case UNGT: return ARM_HI;
22433 case UNGE: return ARM_PL;
22434 /* UNEQ and LTGT do not have a representation. */
22435 case UNEQ: /* Fall through. */
22436 case LTGT: /* Fall through. */
22437 default: return ARM_NV;
22440 case CC_SWPmode:
22441 switch (comp_code)
22443 case NE: return ARM_NE;
22444 case EQ: return ARM_EQ;
22445 case GE: return ARM_LE;
22446 case GT: return ARM_LT;
22447 case LE: return ARM_GE;
22448 case LT: return ARM_GT;
22449 case GEU: return ARM_LS;
22450 case GTU: return ARM_CC;
22451 case LEU: return ARM_CS;
22452 case LTU: return ARM_HI;
22453 default: return ARM_NV;
22456 case CC_Cmode:
22457 switch (comp_code)
22459 case LTU: return ARM_CS;
22460 case GEU: return ARM_CC;
22461 default: return ARM_NV;
22464 case CC_CZmode:
22465 switch (comp_code)
22467 case NE: return ARM_NE;
22468 case EQ: return ARM_EQ;
22469 case GEU: return ARM_CS;
22470 case GTU: return ARM_HI;
22471 case LEU: return ARM_LS;
22472 case LTU: return ARM_CC;
22473 default: return ARM_NV;
22476 case CC_NCVmode:
22477 switch (comp_code)
22479 case GE: return ARM_GE;
22480 case LT: return ARM_LT;
22481 case GEU: return ARM_CS;
22482 case LTU: return ARM_CC;
22483 default: return ARM_NV;
22486 case CCmode:
22487 switch (comp_code)
22489 case NE: return ARM_NE;
22490 case EQ: return ARM_EQ;
22491 case GE: return ARM_GE;
22492 case GT: return ARM_GT;
22493 case LE: return ARM_LE;
22494 case LT: return ARM_LT;
22495 case GEU: return ARM_CS;
22496 case GTU: return ARM_HI;
22497 case LEU: return ARM_LS;
22498 case LTU: return ARM_CC;
22499 default: return ARM_NV;
22502 default: gcc_unreachable ();
22506 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22507 static enum arm_cond_code
22508 get_arm_condition_code (rtx comparison)
22510 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22511 gcc_assert (code != ARM_NV);
22512 return code;
22515 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22516 instructions. */
22517 void
22518 thumb2_final_prescan_insn (rtx_insn *insn)
22520 rtx_insn *first_insn = insn;
22521 rtx body = PATTERN (insn);
22522 rtx predicate;
22523 enum arm_cond_code code;
22524 int n;
22525 int mask;
22526 int max;
22528 /* max_insns_skipped in the tune was already taken into account in the
22529 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22530 just emit the IT blocks as we can. It does not make sense to split
22531 the IT blocks. */
22532 max = MAX_INSN_PER_IT_BLOCK;
22534 /* Remove the previous insn from the count of insns to be output. */
22535 if (arm_condexec_count)
22536 arm_condexec_count--;
22538 /* Nothing to do if we are already inside a conditional block. */
22539 if (arm_condexec_count)
22540 return;
22542 if (GET_CODE (body) != COND_EXEC)
22543 return;
22545 /* Conditional jumps are implemented directly. */
22546 if (JUMP_P (insn))
22547 return;
22549 predicate = COND_EXEC_TEST (body);
22550 arm_current_cc = get_arm_condition_code (predicate);
22552 n = get_attr_ce_count (insn);
22553 arm_condexec_count = 1;
22554 arm_condexec_mask = (1 << n) - 1;
22555 arm_condexec_masklen = n;
22556 /* See if subsequent instructions can be combined into the same block. */
22557 for (;;)
22559 insn = next_nonnote_insn (insn);
22561 /* Jumping into the middle of an IT block is illegal, so a label or
22562 barrier terminates the block. */
22563 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22564 break;
22566 body = PATTERN (insn);
22567 /* USE and CLOBBER aren't really insns, so just skip them. */
22568 if (GET_CODE (body) == USE
22569 || GET_CODE (body) == CLOBBER)
22570 continue;
22572 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22573 if (GET_CODE (body) != COND_EXEC)
22574 break;
22575 /* Maximum number of conditionally executed instructions in a block. */
22576 n = get_attr_ce_count (insn);
22577 if (arm_condexec_masklen + n > max)
22578 break;
22580 predicate = COND_EXEC_TEST (body);
22581 code = get_arm_condition_code (predicate);
22582 mask = (1 << n) - 1;
22583 if (arm_current_cc == code)
22584 arm_condexec_mask |= (mask << arm_condexec_masklen);
22585 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22586 break;
22588 arm_condexec_count++;
22589 arm_condexec_masklen += n;
22591 /* A jump must be the last instruction in a conditional block. */
22592 if (JUMP_P (insn))
22593 break;
22595 /* Restore recog_data (getting the attributes of other insns can
22596 destroy this array, but final.c assumes that it remains intact
22597 across this call). */
22598 extract_constrain_insn_cached (first_insn);
22601 void
22602 arm_final_prescan_insn (rtx_insn *insn)
22604 /* BODY will hold the body of INSN. */
22605 rtx body = PATTERN (insn);
22607 /* This will be 1 if trying to repeat the trick, and things need to be
22608 reversed if it appears to fail. */
22609 int reverse = 0;
22611 /* If we start with a return insn, we only succeed if we find another one. */
22612 int seeking_return = 0;
22613 enum rtx_code return_code = UNKNOWN;
22615 /* START_INSN will hold the insn from where we start looking. This is the
22616 first insn after the following code_label if REVERSE is true. */
22617 rtx_insn *start_insn = insn;
22619 /* If in state 4, check if the target branch is reached, in order to
22620 change back to state 0. */
22621 if (arm_ccfsm_state == 4)
22623 if (insn == arm_target_insn)
22625 arm_target_insn = NULL;
22626 arm_ccfsm_state = 0;
22628 return;
22631 /* If in state 3, it is possible to repeat the trick, if this insn is an
22632 unconditional branch to a label, and immediately following this branch
22633 is the previous target label which is only used once, and the label this
22634 branch jumps to is not too far off. */
22635 if (arm_ccfsm_state == 3)
22637 if (simplejump_p (insn))
22639 start_insn = next_nonnote_insn (start_insn);
22640 if (BARRIER_P (start_insn))
22642 /* XXX Isn't this always a barrier? */
22643 start_insn = next_nonnote_insn (start_insn);
22645 if (LABEL_P (start_insn)
22646 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22647 && LABEL_NUSES (start_insn) == 1)
22648 reverse = TRUE;
22649 else
22650 return;
22652 else if (ANY_RETURN_P (body))
22654 start_insn = next_nonnote_insn (start_insn);
22655 if (BARRIER_P (start_insn))
22656 start_insn = next_nonnote_insn (start_insn);
22657 if (LABEL_P (start_insn)
22658 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22659 && LABEL_NUSES (start_insn) == 1)
22661 reverse = TRUE;
22662 seeking_return = 1;
22663 return_code = GET_CODE (body);
22665 else
22666 return;
22668 else
22669 return;
22672 gcc_assert (!arm_ccfsm_state || reverse);
22673 if (!JUMP_P (insn))
22674 return;
22676 /* This jump might be paralleled with a clobber of the condition codes
22677 the jump should always come first */
22678 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22679 body = XVECEXP (body, 0, 0);
22681 if (reverse
22682 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22683 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22685 int insns_skipped;
22686 int fail = FALSE, succeed = FALSE;
22687 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22688 int then_not_else = TRUE;
22689 rtx_insn *this_insn = start_insn;
22690 rtx label = 0;
22692 /* Register the insn jumped to. */
22693 if (reverse)
22695 if (!seeking_return)
22696 label = XEXP (SET_SRC (body), 0);
22698 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22699 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22700 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22702 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22703 then_not_else = FALSE;
22705 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22707 seeking_return = 1;
22708 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22710 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22712 seeking_return = 1;
22713 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22714 then_not_else = FALSE;
22716 else
22717 gcc_unreachable ();
22719 /* See how many insns this branch skips, and what kind of insns. If all
22720 insns are okay, and the label or unconditional branch to the same
22721 label is not too far away, succeed. */
22722 for (insns_skipped = 0;
22723 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22725 rtx scanbody;
22727 this_insn = next_nonnote_insn (this_insn);
22728 if (!this_insn)
22729 break;
22731 switch (GET_CODE (this_insn))
22733 case CODE_LABEL:
22734 /* Succeed if it is the target label, otherwise fail since
22735 control falls in from somewhere else. */
22736 if (this_insn == label)
22738 arm_ccfsm_state = 1;
22739 succeed = TRUE;
22741 else
22742 fail = TRUE;
22743 break;
22745 case BARRIER:
22746 /* Succeed if the following insn is the target label.
22747 Otherwise fail.
22748 If return insns are used then the last insn in a function
22749 will be a barrier. */
22750 this_insn = next_nonnote_insn (this_insn);
22751 if (this_insn && this_insn == label)
22753 arm_ccfsm_state = 1;
22754 succeed = TRUE;
22756 else
22757 fail = TRUE;
22758 break;
22760 case CALL_INSN:
22761 /* The AAPCS says that conditional calls should not be
22762 used since they make interworking inefficient (the
22763 linker can't transform BL<cond> into BLX). That's
22764 only a problem if the machine has BLX. */
22765 if (arm_arch5)
22767 fail = TRUE;
22768 break;
22771 /* Succeed if the following insn is the target label, or
22772 if the following two insns are a barrier and the
22773 target label. */
22774 this_insn = next_nonnote_insn (this_insn);
22775 if (this_insn && BARRIER_P (this_insn))
22776 this_insn = next_nonnote_insn (this_insn);
22778 if (this_insn && this_insn == label
22779 && insns_skipped < max_insns_skipped)
22781 arm_ccfsm_state = 1;
22782 succeed = TRUE;
22784 else
22785 fail = TRUE;
22786 break;
22788 case JUMP_INSN:
22789 /* If this is an unconditional branch to the same label, succeed.
22790 If it is to another label, do nothing. If it is conditional,
22791 fail. */
22792 /* XXX Probably, the tests for SET and the PC are
22793 unnecessary. */
22795 scanbody = PATTERN (this_insn);
22796 if (GET_CODE (scanbody) == SET
22797 && GET_CODE (SET_DEST (scanbody)) == PC)
22799 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22800 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22802 arm_ccfsm_state = 2;
22803 succeed = TRUE;
22805 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22806 fail = TRUE;
22808 /* Fail if a conditional return is undesirable (e.g. on a
22809 StrongARM), but still allow this if optimizing for size. */
22810 else if (GET_CODE (scanbody) == return_code
22811 && !use_return_insn (TRUE, NULL)
22812 && !optimize_size)
22813 fail = TRUE;
22814 else if (GET_CODE (scanbody) == return_code)
22816 arm_ccfsm_state = 2;
22817 succeed = TRUE;
22819 else if (GET_CODE (scanbody) == PARALLEL)
22821 switch (get_attr_conds (this_insn))
22823 case CONDS_NOCOND:
22824 break;
22825 default:
22826 fail = TRUE;
22827 break;
22830 else
22831 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
22833 break;
22835 case INSN:
22836 /* Instructions using or affecting the condition codes make it
22837 fail. */
22838 scanbody = PATTERN (this_insn);
22839 if (!(GET_CODE (scanbody) == SET
22840 || GET_CODE (scanbody) == PARALLEL)
22841 || get_attr_conds (this_insn) != CONDS_NOCOND)
22842 fail = TRUE;
22843 break;
22845 default:
22846 break;
22849 if (succeed)
22851 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22852 arm_target_label = CODE_LABEL_NUMBER (label);
22853 else
22855 gcc_assert (seeking_return || arm_ccfsm_state == 2);
22857 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22859 this_insn = next_nonnote_insn (this_insn);
22860 gcc_assert (!this_insn
22861 || (!BARRIER_P (this_insn)
22862 && !LABEL_P (this_insn)));
22864 if (!this_insn)
22866 /* Oh, dear! we ran off the end.. give up. */
22867 extract_constrain_insn_cached (insn);
22868 arm_ccfsm_state = 0;
22869 arm_target_insn = NULL;
22870 return;
22872 arm_target_insn = this_insn;
22875 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22876 what it was. */
22877 if (!reverse)
22878 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22880 if (reverse || then_not_else)
22881 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22884 /* Restore recog_data (getting the attributes of other insns can
22885 destroy this array, but final.c assumes that it remains intact
22886 across this call. */
22887 extract_constrain_insn_cached (insn);
22891 /* Output IT instructions. */
22892 void
22893 thumb2_asm_output_opcode (FILE * stream)
22895 char buff[5];
22896 int n;
22898 if (arm_condexec_mask)
22900 for (n = 0; n < arm_condexec_masklen; n++)
22901 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
22902 buff[n] = 0;
22903 asm_fprintf(stream, "i%s\t%s\n\t", buff,
22904 arm_condition_codes[arm_current_cc]);
22905 arm_condexec_mask = 0;
22909 /* Returns true if REGNO is a valid register
22910 for holding a quantity of type MODE. */
22912 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
22914 if (GET_MODE_CLASS (mode) == MODE_CC)
22915 return (regno == CC_REGNUM
22916 || (TARGET_HARD_FLOAT && TARGET_VFP
22917 && regno == VFPCC_REGNUM));
22919 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
22920 return false;
22922 if (TARGET_THUMB1)
22923 /* For the Thumb we only allow values bigger than SImode in
22924 registers 0 - 6, so that there is always a second low
22925 register available to hold the upper part of the value.
22926 We probably we ought to ensure that the register is the
22927 start of an even numbered register pair. */
22928 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
22930 if (TARGET_HARD_FLOAT && TARGET_VFP
22931 && IS_VFP_REGNUM (regno))
22933 if (mode == SFmode || mode == SImode)
22934 return VFP_REGNO_OK_FOR_SINGLE (regno);
22936 if (mode == DFmode)
22937 return VFP_REGNO_OK_FOR_DOUBLE (regno);
22939 /* VFP registers can hold HFmode values, but there is no point in
22940 putting them there unless we have hardware conversion insns. */
22941 if (mode == HFmode)
22942 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
22944 if (TARGET_NEON)
22945 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
22946 || (VALID_NEON_QREG_MODE (mode)
22947 && NEON_REGNO_OK_FOR_QUAD (regno))
22948 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
22949 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
22950 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
22951 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
22952 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
22954 return FALSE;
22957 if (TARGET_REALLY_IWMMXT)
22959 if (IS_IWMMXT_GR_REGNUM (regno))
22960 return mode == SImode;
22962 if (IS_IWMMXT_REGNUM (regno))
22963 return VALID_IWMMXT_REG_MODE (mode);
22966 /* We allow almost any value to be stored in the general registers.
22967 Restrict doubleword quantities to even register pairs in ARM state
22968 so that we can use ldrd. Do not allow very large Neon structure
22969 opaque modes in general registers; they would use too many. */
22970 if (regno <= LAST_ARM_REGNUM)
22972 if (ARM_NUM_REGS (mode) > 4)
22973 return FALSE;
22975 if (TARGET_THUMB2)
22976 return TRUE;
22978 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
22981 if (regno == FRAME_POINTER_REGNUM
22982 || regno == ARG_POINTER_REGNUM)
22983 /* We only allow integers in the fake hard registers. */
22984 return GET_MODE_CLASS (mode) == MODE_INT;
22986 return FALSE;
22989 /* Implement MODES_TIEABLE_P. */
22991 bool
22992 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
22994 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
22995 return true;
22997 /* We specifically want to allow elements of "structure" modes to
22998 be tieable to the structure. This more general condition allows
22999 other rarer situations too. */
23000 if (TARGET_NEON
23001 && (VALID_NEON_DREG_MODE (mode1)
23002 || VALID_NEON_QREG_MODE (mode1)
23003 || VALID_NEON_STRUCT_MODE (mode1))
23004 && (VALID_NEON_DREG_MODE (mode2)
23005 || VALID_NEON_QREG_MODE (mode2)
23006 || VALID_NEON_STRUCT_MODE (mode2)))
23007 return true;
23009 return false;
23012 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23013 not used in arm mode. */
23015 enum reg_class
23016 arm_regno_class (int regno)
23018 if (regno == PC_REGNUM)
23019 return NO_REGS;
23021 if (TARGET_THUMB1)
23023 if (regno == STACK_POINTER_REGNUM)
23024 return STACK_REG;
23025 if (regno == CC_REGNUM)
23026 return CC_REG;
23027 if (regno < 8)
23028 return LO_REGS;
23029 return HI_REGS;
23032 if (TARGET_THUMB2 && regno < 8)
23033 return LO_REGS;
23035 if ( regno <= LAST_ARM_REGNUM
23036 || regno == FRAME_POINTER_REGNUM
23037 || regno == ARG_POINTER_REGNUM)
23038 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23040 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23041 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23043 if (IS_VFP_REGNUM (regno))
23045 if (regno <= D7_VFP_REGNUM)
23046 return VFP_D0_D7_REGS;
23047 else if (regno <= LAST_LO_VFP_REGNUM)
23048 return VFP_LO_REGS;
23049 else
23050 return VFP_HI_REGS;
23053 if (IS_IWMMXT_REGNUM (regno))
23054 return IWMMXT_REGS;
23056 if (IS_IWMMXT_GR_REGNUM (regno))
23057 return IWMMXT_GR_REGS;
23059 return NO_REGS;
23062 /* Handle a special case when computing the offset
23063 of an argument from the frame pointer. */
23065 arm_debugger_arg_offset (int value, rtx addr)
23067 rtx_insn *insn;
23069 /* We are only interested if dbxout_parms() failed to compute the offset. */
23070 if (value != 0)
23071 return 0;
23073 /* We can only cope with the case where the address is held in a register. */
23074 if (!REG_P (addr))
23075 return 0;
23077 /* If we are using the frame pointer to point at the argument, then
23078 an offset of 0 is correct. */
23079 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23080 return 0;
23082 /* If we are using the stack pointer to point at the
23083 argument, then an offset of 0 is correct. */
23084 /* ??? Check this is consistent with thumb2 frame layout. */
23085 if ((TARGET_THUMB || !frame_pointer_needed)
23086 && REGNO (addr) == SP_REGNUM)
23087 return 0;
23089 /* Oh dear. The argument is pointed to by a register rather
23090 than being held in a register, or being stored at a known
23091 offset from the frame pointer. Since GDB only understands
23092 those two kinds of argument we must translate the address
23093 held in the register into an offset from the frame pointer.
23094 We do this by searching through the insns for the function
23095 looking to see where this register gets its value. If the
23096 register is initialized from the frame pointer plus an offset
23097 then we are in luck and we can continue, otherwise we give up.
23099 This code is exercised by producing debugging information
23100 for a function with arguments like this:
23102 double func (double a, double b, int c, double d) {return d;}
23104 Without this code the stab for parameter 'd' will be set to
23105 an offset of 0 from the frame pointer, rather than 8. */
23107 /* The if() statement says:
23109 If the insn is a normal instruction
23110 and if the insn is setting the value in a register
23111 and if the register being set is the register holding the address of the argument
23112 and if the address is computing by an addition
23113 that involves adding to a register
23114 which is the frame pointer
23115 a constant integer
23117 then... */
23119 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23121 if ( NONJUMP_INSN_P (insn)
23122 && GET_CODE (PATTERN (insn)) == SET
23123 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23124 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23125 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23126 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23127 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23130 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23132 break;
23136 if (value == 0)
23138 debug_rtx (addr);
23139 warning (0, "unable to compute real location of stacked parameter");
23140 value = 8; /* XXX magic hack */
23143 return value;
23146 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23148 static const char *
23149 arm_invalid_parameter_type (const_tree t)
23151 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23152 return N_("function parameters cannot have __fp16 type");
23153 return NULL;
23156 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23158 static const char *
23159 arm_invalid_return_type (const_tree t)
23161 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23162 return N_("functions cannot return __fp16 type");
23163 return NULL;
23166 /* Implement TARGET_PROMOTED_TYPE. */
23168 static tree
23169 arm_promoted_type (const_tree t)
23171 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23172 return float_type_node;
23173 return NULL_TREE;
23176 /* Implement TARGET_CONVERT_TO_TYPE.
23177 Specifically, this hook implements the peculiarity of the ARM
23178 half-precision floating-point C semantics that requires conversions between
23179 __fp16 to or from double to do an intermediate conversion to float. */
23181 static tree
23182 arm_convert_to_type (tree type, tree expr)
23184 tree fromtype = TREE_TYPE (expr);
23185 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
23186 return NULL_TREE;
23187 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
23188 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
23189 return convert (type, convert (float_type_node, expr));
23190 return NULL_TREE;
23193 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23194 This simply adds HFmode as a supported mode; even though we don't
23195 implement arithmetic on this type directly, it's supported by
23196 optabs conversions, much the way the double-word arithmetic is
23197 special-cased in the default hook. */
23199 static bool
23200 arm_scalar_mode_supported_p (machine_mode mode)
23202 if (mode == HFmode)
23203 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23204 else if (ALL_FIXED_POINT_MODE_P (mode))
23205 return true;
23206 else
23207 return default_scalar_mode_supported_p (mode);
23210 /* Emit code to reinterpret one Neon type as another, without altering bits. */
23211 void
23212 neon_reinterpret (rtx dest, rtx src)
23214 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
23217 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23218 not to early-clobber SRC registers in the process.
23220 We assume that the operands described by SRC and DEST represent a
23221 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23222 number of components into which the copy has been decomposed. */
23223 void
23224 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23226 unsigned int i;
23228 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23229 || REGNO (operands[0]) < REGNO (operands[1]))
23231 for (i = 0; i < count; i++)
23233 operands[2 * i] = dest[i];
23234 operands[2 * i + 1] = src[i];
23237 else
23239 for (i = 0; i < count; i++)
23241 operands[2 * i] = dest[count - i - 1];
23242 operands[2 * i + 1] = src[count - i - 1];
23247 /* Split operands into moves from op[1] + op[2] into op[0]. */
23249 void
23250 neon_split_vcombine (rtx operands[3])
23252 unsigned int dest = REGNO (operands[0]);
23253 unsigned int src1 = REGNO (operands[1]);
23254 unsigned int src2 = REGNO (operands[2]);
23255 machine_mode halfmode = GET_MODE (operands[1]);
23256 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23257 rtx destlo, desthi;
23259 if (src1 == dest && src2 == dest + halfregs)
23261 /* No-op move. Can't split to nothing; emit something. */
23262 emit_note (NOTE_INSN_DELETED);
23263 return;
23266 /* Preserve register attributes for variable tracking. */
23267 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23268 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23269 GET_MODE_SIZE (halfmode));
23271 /* Special case of reversed high/low parts. Use VSWP. */
23272 if (src2 == dest && src1 == dest + halfregs)
23274 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
23275 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
23276 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23277 return;
23280 if (!reg_overlap_mentioned_p (operands[2], destlo))
23282 /* Try to avoid unnecessary moves if part of the result
23283 is in the right place already. */
23284 if (src1 != dest)
23285 emit_move_insn (destlo, operands[1]);
23286 if (src2 != dest + halfregs)
23287 emit_move_insn (desthi, operands[2]);
23289 else
23291 if (src2 != dest + halfregs)
23292 emit_move_insn (desthi, operands[2]);
23293 if (src1 != dest)
23294 emit_move_insn (destlo, operands[1]);
23298 /* Return the number (counting from 0) of
23299 the least significant set bit in MASK. */
23301 inline static int
23302 number_of_first_bit_set (unsigned mask)
23304 return ctz_hwi (mask);
23307 /* Like emit_multi_reg_push, but allowing for a different set of
23308 registers to be described as saved. MASK is the set of registers
23309 to be saved; REAL_REGS is the set of registers to be described as
23310 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23312 static rtx_insn *
23313 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23315 unsigned long regno;
23316 rtx par[10], tmp, reg;
23317 rtx_insn *insn;
23318 int i, j;
23320 /* Build the parallel of the registers actually being stored. */
23321 for (i = 0; mask; ++i, mask &= mask - 1)
23323 regno = ctz_hwi (mask);
23324 reg = gen_rtx_REG (SImode, regno);
23326 if (i == 0)
23327 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23328 else
23329 tmp = gen_rtx_USE (VOIDmode, reg);
23331 par[i] = tmp;
23334 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23335 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23336 tmp = gen_frame_mem (BLKmode, tmp);
23337 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
23338 par[0] = tmp;
23340 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23341 insn = emit_insn (tmp);
23343 /* Always build the stack adjustment note for unwind info. */
23344 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23345 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
23346 par[0] = tmp;
23348 /* Build the parallel of the registers recorded as saved for unwind. */
23349 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23351 regno = ctz_hwi (real_regs);
23352 reg = gen_rtx_REG (SImode, regno);
23354 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23355 tmp = gen_frame_mem (SImode, tmp);
23356 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
23357 RTX_FRAME_RELATED_P (tmp) = 1;
23358 par[j + 1] = tmp;
23361 if (j == 0)
23362 tmp = par[0];
23363 else
23365 RTX_FRAME_RELATED_P (par[0]) = 1;
23366 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23369 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23371 return insn;
23374 /* Emit code to push or pop registers to or from the stack. F is the
23375 assembly file. MASK is the registers to pop. */
23376 static void
23377 thumb_pop (FILE *f, unsigned long mask)
23379 int regno;
23380 int lo_mask = mask & 0xFF;
23381 int pushed_words = 0;
23383 gcc_assert (mask);
23385 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23387 /* Special case. Do not generate a POP PC statement here, do it in
23388 thumb_exit() */
23389 thumb_exit (f, -1);
23390 return;
23393 fprintf (f, "\tpop\t{");
23395 /* Look at the low registers first. */
23396 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23398 if (lo_mask & 1)
23400 asm_fprintf (f, "%r", regno);
23402 if ((lo_mask & ~1) != 0)
23403 fprintf (f, ", ");
23405 pushed_words++;
23409 if (mask & (1 << PC_REGNUM))
23411 /* Catch popping the PC. */
23412 if (TARGET_INTERWORK || TARGET_BACKTRACE
23413 || crtl->calls_eh_return)
23415 /* The PC is never poped directly, instead
23416 it is popped into r3 and then BX is used. */
23417 fprintf (f, "}\n");
23419 thumb_exit (f, -1);
23421 return;
23423 else
23425 if (mask & 0xFF)
23426 fprintf (f, ", ");
23428 asm_fprintf (f, "%r", PC_REGNUM);
23432 fprintf (f, "}\n");
23435 /* Generate code to return from a thumb function.
23436 If 'reg_containing_return_addr' is -1, then the return address is
23437 actually on the stack, at the stack pointer. */
23438 static void
23439 thumb_exit (FILE *f, int reg_containing_return_addr)
23441 unsigned regs_available_for_popping;
23442 unsigned regs_to_pop;
23443 int pops_needed;
23444 unsigned available;
23445 unsigned required;
23446 machine_mode mode;
23447 int size;
23448 int restore_a4 = FALSE;
23450 /* Compute the registers we need to pop. */
23451 regs_to_pop = 0;
23452 pops_needed = 0;
23454 if (reg_containing_return_addr == -1)
23456 regs_to_pop |= 1 << LR_REGNUM;
23457 ++pops_needed;
23460 if (TARGET_BACKTRACE)
23462 /* Restore the (ARM) frame pointer and stack pointer. */
23463 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23464 pops_needed += 2;
23467 /* If there is nothing to pop then just emit the BX instruction and
23468 return. */
23469 if (pops_needed == 0)
23471 if (crtl->calls_eh_return)
23472 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23474 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23475 return;
23477 /* Otherwise if we are not supporting interworking and we have not created
23478 a backtrace structure and the function was not entered in ARM mode then
23479 just pop the return address straight into the PC. */
23480 else if (!TARGET_INTERWORK
23481 && !TARGET_BACKTRACE
23482 && !is_called_in_ARM_mode (current_function_decl)
23483 && !crtl->calls_eh_return)
23485 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23486 return;
23489 /* Find out how many of the (return) argument registers we can corrupt. */
23490 regs_available_for_popping = 0;
23492 /* If returning via __builtin_eh_return, the bottom three registers
23493 all contain information needed for the return. */
23494 if (crtl->calls_eh_return)
23495 size = 12;
23496 else
23498 /* If we can deduce the registers used from the function's
23499 return value. This is more reliable that examining
23500 df_regs_ever_live_p () because that will be set if the register is
23501 ever used in the function, not just if the register is used
23502 to hold a return value. */
23504 if (crtl->return_rtx != 0)
23505 mode = GET_MODE (crtl->return_rtx);
23506 else
23507 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23509 size = GET_MODE_SIZE (mode);
23511 if (size == 0)
23513 /* In a void function we can use any argument register.
23514 In a function that returns a structure on the stack
23515 we can use the second and third argument registers. */
23516 if (mode == VOIDmode)
23517 regs_available_for_popping =
23518 (1 << ARG_REGISTER (1))
23519 | (1 << ARG_REGISTER (2))
23520 | (1 << ARG_REGISTER (3));
23521 else
23522 regs_available_for_popping =
23523 (1 << ARG_REGISTER (2))
23524 | (1 << ARG_REGISTER (3));
23526 else if (size <= 4)
23527 regs_available_for_popping =
23528 (1 << ARG_REGISTER (2))
23529 | (1 << ARG_REGISTER (3));
23530 else if (size <= 8)
23531 regs_available_for_popping =
23532 (1 << ARG_REGISTER (3));
23535 /* Match registers to be popped with registers into which we pop them. */
23536 for (available = regs_available_for_popping,
23537 required = regs_to_pop;
23538 required != 0 && available != 0;
23539 available &= ~(available & - available),
23540 required &= ~(required & - required))
23541 -- pops_needed;
23543 /* If we have any popping registers left over, remove them. */
23544 if (available > 0)
23545 regs_available_for_popping &= ~available;
23547 /* Otherwise if we need another popping register we can use
23548 the fourth argument register. */
23549 else if (pops_needed)
23551 /* If we have not found any free argument registers and
23552 reg a4 contains the return address, we must move it. */
23553 if (regs_available_for_popping == 0
23554 && reg_containing_return_addr == LAST_ARG_REGNUM)
23556 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23557 reg_containing_return_addr = LR_REGNUM;
23559 else if (size > 12)
23561 /* Register a4 is being used to hold part of the return value,
23562 but we have dire need of a free, low register. */
23563 restore_a4 = TRUE;
23565 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23568 if (reg_containing_return_addr != LAST_ARG_REGNUM)
23570 /* The fourth argument register is available. */
23571 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23573 --pops_needed;
23577 /* Pop as many registers as we can. */
23578 thumb_pop (f, regs_available_for_popping);
23580 /* Process the registers we popped. */
23581 if (reg_containing_return_addr == -1)
23583 /* The return address was popped into the lowest numbered register. */
23584 regs_to_pop &= ~(1 << LR_REGNUM);
23586 reg_containing_return_addr =
23587 number_of_first_bit_set (regs_available_for_popping);
23589 /* Remove this register for the mask of available registers, so that
23590 the return address will not be corrupted by further pops. */
23591 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
23594 /* If we popped other registers then handle them here. */
23595 if (regs_available_for_popping)
23597 int frame_pointer;
23599 /* Work out which register currently contains the frame pointer. */
23600 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
23602 /* Move it into the correct place. */
23603 asm_fprintf (f, "\tmov\t%r, %r\n",
23604 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
23606 /* (Temporarily) remove it from the mask of popped registers. */
23607 regs_available_for_popping &= ~(1 << frame_pointer);
23608 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
23610 if (regs_available_for_popping)
23612 int stack_pointer;
23614 /* We popped the stack pointer as well,
23615 find the register that contains it. */
23616 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
23618 /* Move it into the stack register. */
23619 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
23621 /* At this point we have popped all necessary registers, so
23622 do not worry about restoring regs_available_for_popping
23623 to its correct value:
23625 assert (pops_needed == 0)
23626 assert (regs_available_for_popping == (1 << frame_pointer))
23627 assert (regs_to_pop == (1 << STACK_POINTER)) */
23629 else
23631 /* Since we have just move the popped value into the frame
23632 pointer, the popping register is available for reuse, and
23633 we know that we still have the stack pointer left to pop. */
23634 regs_available_for_popping |= (1 << frame_pointer);
23638 /* If we still have registers left on the stack, but we no longer have
23639 any registers into which we can pop them, then we must move the return
23640 address into the link register and make available the register that
23641 contained it. */
23642 if (regs_available_for_popping == 0 && pops_needed > 0)
23644 regs_available_for_popping |= 1 << reg_containing_return_addr;
23646 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
23647 reg_containing_return_addr);
23649 reg_containing_return_addr = LR_REGNUM;
23652 /* If we have registers left on the stack then pop some more.
23653 We know that at most we will want to pop FP and SP. */
23654 if (pops_needed > 0)
23656 int popped_into;
23657 int move_to;
23659 thumb_pop (f, regs_available_for_popping);
23661 /* We have popped either FP or SP.
23662 Move whichever one it is into the correct register. */
23663 popped_into = number_of_first_bit_set (regs_available_for_popping);
23664 move_to = number_of_first_bit_set (regs_to_pop);
23666 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
23668 regs_to_pop &= ~(1 << move_to);
23670 --pops_needed;
23673 /* If we still have not popped everything then we must have only
23674 had one register available to us and we are now popping the SP. */
23675 if (pops_needed > 0)
23677 int popped_into;
23679 thumb_pop (f, regs_available_for_popping);
23681 popped_into = number_of_first_bit_set (regs_available_for_popping);
23683 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
23685 assert (regs_to_pop == (1 << STACK_POINTER))
23686 assert (pops_needed == 1)
23690 /* If necessary restore the a4 register. */
23691 if (restore_a4)
23693 if (reg_containing_return_addr != LR_REGNUM)
23695 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23696 reg_containing_return_addr = LR_REGNUM;
23699 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
23702 if (crtl->calls_eh_return)
23703 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23705 /* Return to caller. */
23706 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23709 /* Scan INSN just before assembler is output for it.
23710 For Thumb-1, we track the status of the condition codes; this
23711 information is used in the cbranchsi4_insn pattern. */
23712 void
23713 thumb1_final_prescan_insn (rtx_insn *insn)
23715 if (flag_print_asm_name)
23716 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
23717 INSN_ADDRESSES (INSN_UID (insn)));
23718 /* Don't overwrite the previous setter when we get to a cbranch. */
23719 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
23721 enum attr_conds conds;
23723 if (cfun->machine->thumb1_cc_insn)
23725 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
23726 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
23727 CC_STATUS_INIT;
23729 conds = get_attr_conds (insn);
23730 if (conds == CONDS_SET)
23732 rtx set = single_set (insn);
23733 cfun->machine->thumb1_cc_insn = insn;
23734 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
23735 cfun->machine->thumb1_cc_op1 = const0_rtx;
23736 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
23737 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
23739 rtx src1 = XEXP (SET_SRC (set), 1);
23740 if (src1 == const0_rtx)
23741 cfun->machine->thumb1_cc_mode = CCmode;
23743 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
23745 /* Record the src register operand instead of dest because
23746 cprop_hardreg pass propagates src. */
23747 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
23750 else if (conds != CONDS_NOCOND)
23751 cfun->machine->thumb1_cc_insn = NULL_RTX;
23754 /* Check if unexpected far jump is used. */
23755 if (cfun->machine->lr_save_eliminated
23756 && get_attr_far_jump (insn) == FAR_JUMP_YES)
23757 internal_error("Unexpected thumb1 far jump");
23761 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
23763 unsigned HOST_WIDE_INT mask = 0xff;
23764 int i;
23766 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
23767 if (val == 0) /* XXX */
23768 return 0;
23770 for (i = 0; i < 25; i++)
23771 if ((val & (mask << i)) == val)
23772 return 1;
23774 return 0;
23777 /* Returns nonzero if the current function contains,
23778 or might contain a far jump. */
23779 static int
23780 thumb_far_jump_used_p (void)
23782 rtx_insn *insn;
23783 bool far_jump = false;
23784 unsigned int func_size = 0;
23786 /* This test is only important for leaf functions. */
23787 /* assert (!leaf_function_p ()); */
23789 /* If we have already decided that far jumps may be used,
23790 do not bother checking again, and always return true even if
23791 it turns out that they are not being used. Once we have made
23792 the decision that far jumps are present (and that hence the link
23793 register will be pushed onto the stack) we cannot go back on it. */
23794 if (cfun->machine->far_jump_used)
23795 return 1;
23797 /* If this function is not being called from the prologue/epilogue
23798 generation code then it must be being called from the
23799 INITIAL_ELIMINATION_OFFSET macro. */
23800 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
23802 /* In this case we know that we are being asked about the elimination
23803 of the arg pointer register. If that register is not being used,
23804 then there are no arguments on the stack, and we do not have to
23805 worry that a far jump might force the prologue to push the link
23806 register, changing the stack offsets. In this case we can just
23807 return false, since the presence of far jumps in the function will
23808 not affect stack offsets.
23810 If the arg pointer is live (or if it was live, but has now been
23811 eliminated and so set to dead) then we do have to test to see if
23812 the function might contain a far jump. This test can lead to some
23813 false negatives, since before reload is completed, then length of
23814 branch instructions is not known, so gcc defaults to returning their
23815 longest length, which in turn sets the far jump attribute to true.
23817 A false negative will not result in bad code being generated, but it
23818 will result in a needless push and pop of the link register. We
23819 hope that this does not occur too often.
23821 If we need doubleword stack alignment this could affect the other
23822 elimination offsets so we can't risk getting it wrong. */
23823 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
23824 cfun->machine->arg_pointer_live = 1;
23825 else if (!cfun->machine->arg_pointer_live)
23826 return 0;
23829 /* We should not change far_jump_used during or after reload, as there is
23830 no chance to change stack frame layout. */
23831 if (reload_in_progress || reload_completed)
23832 return 0;
23834 /* Check to see if the function contains a branch
23835 insn with the far jump attribute set. */
23836 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23838 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
23840 far_jump = true;
23842 func_size += get_attr_length (insn);
23845 /* Attribute far_jump will always be true for thumb1 before
23846 shorten_branch pass. So checking far_jump attribute before
23847 shorten_branch isn't much useful.
23849 Following heuristic tries to estimate more accurately if a far jump
23850 may finally be used. The heuristic is very conservative as there is
23851 no chance to roll-back the decision of not to use far jump.
23853 Thumb1 long branch offset is -2048 to 2046. The worst case is each
23854 2-byte insn is associated with a 4 byte constant pool. Using
23855 function size 2048/3 as the threshold is conservative enough. */
23856 if (far_jump)
23858 if ((func_size * 3) >= 2048)
23860 /* Record the fact that we have decided that
23861 the function does use far jumps. */
23862 cfun->machine->far_jump_used = 1;
23863 return 1;
23867 return 0;
23870 /* Return nonzero if FUNC must be entered in ARM mode. */
23872 is_called_in_ARM_mode (tree func)
23874 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
23876 /* Ignore the problem about functions whose address is taken. */
23877 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
23878 return TRUE;
23880 #ifdef ARM_PE
23881 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
23882 #else
23883 return FALSE;
23884 #endif
23887 /* Given the stack offsets and register mask in OFFSETS, decide how
23888 many additional registers to push instead of subtracting a constant
23889 from SP. For epilogues the principle is the same except we use pop.
23890 FOR_PROLOGUE indicates which we're generating. */
23891 static int
23892 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
23894 HOST_WIDE_INT amount;
23895 unsigned long live_regs_mask = offsets->saved_regs_mask;
23896 /* Extract a mask of the ones we can give to the Thumb's push/pop
23897 instruction. */
23898 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
23899 /* Then count how many other high registers will need to be pushed. */
23900 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23901 int n_free, reg_base, size;
23903 if (!for_prologue && frame_pointer_needed)
23904 amount = offsets->locals_base - offsets->saved_regs;
23905 else
23906 amount = offsets->outgoing_args - offsets->saved_regs;
23908 /* If the stack frame size is 512 exactly, we can save one load
23909 instruction, which should make this a win even when optimizing
23910 for speed. */
23911 if (!optimize_size && amount != 512)
23912 return 0;
23914 /* Can't do this if there are high registers to push. */
23915 if (high_regs_pushed != 0)
23916 return 0;
23918 /* Shouldn't do it in the prologue if no registers would normally
23919 be pushed at all. In the epilogue, also allow it if we'll have
23920 a pop insn for the PC. */
23921 if (l_mask == 0
23922 && (for_prologue
23923 || TARGET_BACKTRACE
23924 || (live_regs_mask & 1 << LR_REGNUM) == 0
23925 || TARGET_INTERWORK
23926 || crtl->args.pretend_args_size != 0))
23927 return 0;
23929 /* Don't do this if thumb_expand_prologue wants to emit instructions
23930 between the push and the stack frame allocation. */
23931 if (for_prologue
23932 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
23933 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
23934 return 0;
23936 reg_base = 0;
23937 n_free = 0;
23938 if (!for_prologue)
23940 size = arm_size_return_regs ();
23941 reg_base = ARM_NUM_INTS (size);
23942 live_regs_mask >>= reg_base;
23945 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
23946 && (for_prologue || call_used_regs[reg_base + n_free]))
23948 live_regs_mask >>= 1;
23949 n_free++;
23952 if (n_free == 0)
23953 return 0;
23954 gcc_assert (amount / 4 * 4 == amount);
23956 if (amount >= 512 && (amount - n_free * 4) < 512)
23957 return (amount - 508) / 4;
23958 if (amount <= n_free * 4)
23959 return amount / 4;
23960 return 0;
23963 /* The bits which aren't usefully expanded as rtl. */
23964 const char *
23965 thumb1_unexpanded_epilogue (void)
23967 arm_stack_offsets *offsets;
23968 int regno;
23969 unsigned long live_regs_mask = 0;
23970 int high_regs_pushed = 0;
23971 int extra_pop;
23972 int had_to_push_lr;
23973 int size;
23975 if (cfun->machine->return_used_this_function != 0)
23976 return "";
23978 if (IS_NAKED (arm_current_func_type ()))
23979 return "";
23981 offsets = arm_get_frame_offsets ();
23982 live_regs_mask = offsets->saved_regs_mask;
23983 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23985 /* If we can deduce the registers used from the function's return value.
23986 This is more reliable that examining df_regs_ever_live_p () because that
23987 will be set if the register is ever used in the function, not just if
23988 the register is used to hold a return value. */
23989 size = arm_size_return_regs ();
23991 extra_pop = thumb1_extra_regs_pushed (offsets, false);
23992 if (extra_pop > 0)
23994 unsigned long extra_mask = (1 << extra_pop) - 1;
23995 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
23998 /* The prolog may have pushed some high registers to use as
23999 work registers. e.g. the testsuite file:
24000 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24001 compiles to produce:
24002 push {r4, r5, r6, r7, lr}
24003 mov r7, r9
24004 mov r6, r8
24005 push {r6, r7}
24006 as part of the prolog. We have to undo that pushing here. */
24008 if (high_regs_pushed)
24010 unsigned long mask = live_regs_mask & 0xff;
24011 int next_hi_reg;
24013 /* The available low registers depend on the size of the value we are
24014 returning. */
24015 if (size <= 12)
24016 mask |= 1 << 3;
24017 if (size <= 8)
24018 mask |= 1 << 2;
24020 if (mask == 0)
24021 /* Oh dear! We have no low registers into which we can pop
24022 high registers! */
24023 internal_error
24024 ("no low registers available for popping high registers");
24026 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24027 if (live_regs_mask & (1 << next_hi_reg))
24028 break;
24030 while (high_regs_pushed)
24032 /* Find lo register(s) into which the high register(s) can
24033 be popped. */
24034 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24036 if (mask & (1 << regno))
24037 high_regs_pushed--;
24038 if (high_regs_pushed == 0)
24039 break;
24042 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24044 /* Pop the values into the low register(s). */
24045 thumb_pop (asm_out_file, mask);
24047 /* Move the value(s) into the high registers. */
24048 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24050 if (mask & (1 << regno))
24052 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24053 regno);
24055 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24056 if (live_regs_mask & (1 << next_hi_reg))
24057 break;
24061 live_regs_mask &= ~0x0f00;
24064 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24065 live_regs_mask &= 0xff;
24067 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24069 /* Pop the return address into the PC. */
24070 if (had_to_push_lr)
24071 live_regs_mask |= 1 << PC_REGNUM;
24073 /* Either no argument registers were pushed or a backtrace
24074 structure was created which includes an adjusted stack
24075 pointer, so just pop everything. */
24076 if (live_regs_mask)
24077 thumb_pop (asm_out_file, live_regs_mask);
24079 /* We have either just popped the return address into the
24080 PC or it is was kept in LR for the entire function.
24081 Note that thumb_pop has already called thumb_exit if the
24082 PC was in the list. */
24083 if (!had_to_push_lr)
24084 thumb_exit (asm_out_file, LR_REGNUM);
24086 else
24088 /* Pop everything but the return address. */
24089 if (live_regs_mask)
24090 thumb_pop (asm_out_file, live_regs_mask);
24092 if (had_to_push_lr)
24094 if (size > 12)
24096 /* We have no free low regs, so save one. */
24097 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24098 LAST_ARG_REGNUM);
24101 /* Get the return address into a temporary register. */
24102 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24104 if (size > 12)
24106 /* Move the return address to lr. */
24107 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24108 LAST_ARG_REGNUM);
24109 /* Restore the low register. */
24110 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24111 IP_REGNUM);
24112 regno = LR_REGNUM;
24114 else
24115 regno = LAST_ARG_REGNUM;
24117 else
24118 regno = LR_REGNUM;
24120 /* Remove the argument registers that were pushed onto the stack. */
24121 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24122 SP_REGNUM, SP_REGNUM,
24123 crtl->args.pretend_args_size);
24125 thumb_exit (asm_out_file, regno);
24128 return "";
24131 /* Functions to save and restore machine-specific function data. */
24132 static struct machine_function *
24133 arm_init_machine_status (void)
24135 struct machine_function *machine;
24136 machine = ggc_cleared_alloc<machine_function> ();
24138 #if ARM_FT_UNKNOWN != 0
24139 machine->func_type = ARM_FT_UNKNOWN;
24140 #endif
24141 return machine;
24144 /* Return an RTX indicating where the return address to the
24145 calling function can be found. */
24147 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24149 if (count != 0)
24150 return NULL_RTX;
24152 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24155 /* Do anything needed before RTL is emitted for each function. */
24156 void
24157 arm_init_expanders (void)
24159 /* Arrange to initialize and mark the machine per-function status. */
24160 init_machine_status = arm_init_machine_status;
24162 /* This is to stop the combine pass optimizing away the alignment
24163 adjustment of va_arg. */
24164 /* ??? It is claimed that this should not be necessary. */
24165 if (cfun)
24166 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24170 /* Like arm_compute_initial_elimination offset. Simpler because there
24171 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24172 to point at the base of the local variables after static stack
24173 space for a function has been allocated. */
24175 HOST_WIDE_INT
24176 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24178 arm_stack_offsets *offsets;
24180 offsets = arm_get_frame_offsets ();
24182 switch (from)
24184 case ARG_POINTER_REGNUM:
24185 switch (to)
24187 case STACK_POINTER_REGNUM:
24188 return offsets->outgoing_args - offsets->saved_args;
24190 case FRAME_POINTER_REGNUM:
24191 return offsets->soft_frame - offsets->saved_args;
24193 case ARM_HARD_FRAME_POINTER_REGNUM:
24194 return offsets->saved_regs - offsets->saved_args;
24196 case THUMB_HARD_FRAME_POINTER_REGNUM:
24197 return offsets->locals_base - offsets->saved_args;
24199 default:
24200 gcc_unreachable ();
24202 break;
24204 case FRAME_POINTER_REGNUM:
24205 switch (to)
24207 case STACK_POINTER_REGNUM:
24208 return offsets->outgoing_args - offsets->soft_frame;
24210 case ARM_HARD_FRAME_POINTER_REGNUM:
24211 return offsets->saved_regs - offsets->soft_frame;
24213 case THUMB_HARD_FRAME_POINTER_REGNUM:
24214 return offsets->locals_base - offsets->soft_frame;
24216 default:
24217 gcc_unreachable ();
24219 break;
24221 default:
24222 gcc_unreachable ();
24226 /* Generate the function's prologue. */
24228 void
24229 thumb1_expand_prologue (void)
24231 rtx_insn *insn;
24233 HOST_WIDE_INT amount;
24234 arm_stack_offsets *offsets;
24235 unsigned long func_type;
24236 int regno;
24237 unsigned long live_regs_mask;
24238 unsigned long l_mask;
24239 unsigned high_regs_pushed = 0;
24241 func_type = arm_current_func_type ();
24243 /* Naked functions don't have prologues. */
24244 if (IS_NAKED (func_type))
24245 return;
24247 if (IS_INTERRUPT (func_type))
24249 error ("interrupt Service Routines cannot be coded in Thumb mode");
24250 return;
24253 if (is_called_in_ARM_mode (current_function_decl))
24254 emit_insn (gen_prologue_thumb1_interwork ());
24256 offsets = arm_get_frame_offsets ();
24257 live_regs_mask = offsets->saved_regs_mask;
24259 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24260 l_mask = live_regs_mask & 0x40ff;
24261 /* Then count how many other high registers will need to be pushed. */
24262 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24264 if (crtl->args.pretend_args_size)
24266 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24268 if (cfun->machine->uses_anonymous_args)
24270 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24271 unsigned long mask;
24273 mask = 1ul << (LAST_ARG_REGNUM + 1);
24274 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24276 insn = thumb1_emit_multi_reg_push (mask, 0);
24278 else
24280 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24281 stack_pointer_rtx, x));
24283 RTX_FRAME_RELATED_P (insn) = 1;
24286 if (TARGET_BACKTRACE)
24288 HOST_WIDE_INT offset = 0;
24289 unsigned work_register;
24290 rtx work_reg, x, arm_hfp_rtx;
24292 /* We have been asked to create a stack backtrace structure.
24293 The code looks like this:
24295 0 .align 2
24296 0 func:
24297 0 sub SP, #16 Reserve space for 4 registers.
24298 2 push {R7} Push low registers.
24299 4 add R7, SP, #20 Get the stack pointer before the push.
24300 6 str R7, [SP, #8] Store the stack pointer
24301 (before reserving the space).
24302 8 mov R7, PC Get hold of the start of this code + 12.
24303 10 str R7, [SP, #16] Store it.
24304 12 mov R7, FP Get hold of the current frame pointer.
24305 14 str R7, [SP, #4] Store it.
24306 16 mov R7, LR Get hold of the current return address.
24307 18 str R7, [SP, #12] Store it.
24308 20 add R7, SP, #16 Point at the start of the
24309 backtrace structure.
24310 22 mov FP, R7 Put this value into the frame pointer. */
24312 work_register = thumb_find_work_register (live_regs_mask);
24313 work_reg = gen_rtx_REG (SImode, work_register);
24314 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24316 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24317 stack_pointer_rtx, GEN_INT (-16)));
24318 RTX_FRAME_RELATED_P (insn) = 1;
24320 if (l_mask)
24322 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24323 RTX_FRAME_RELATED_P (insn) = 1;
24325 offset = bit_count (l_mask) * UNITS_PER_WORD;
24328 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24329 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24331 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24332 x = gen_frame_mem (SImode, x);
24333 emit_move_insn (x, work_reg);
24335 /* Make sure that the instruction fetching the PC is in the right place
24336 to calculate "start of backtrace creation code + 12". */
24337 /* ??? The stores using the common WORK_REG ought to be enough to
24338 prevent the scheduler from doing anything weird. Failing that
24339 we could always move all of the following into an UNSPEC_VOLATILE. */
24340 if (l_mask)
24342 x = gen_rtx_REG (SImode, PC_REGNUM);
24343 emit_move_insn (work_reg, x);
24345 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24346 x = gen_frame_mem (SImode, x);
24347 emit_move_insn (x, work_reg);
24349 emit_move_insn (work_reg, arm_hfp_rtx);
24351 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24352 x = gen_frame_mem (SImode, x);
24353 emit_move_insn (x, work_reg);
24355 else
24357 emit_move_insn (work_reg, arm_hfp_rtx);
24359 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24360 x = gen_frame_mem (SImode, x);
24361 emit_move_insn (x, work_reg);
24363 x = gen_rtx_REG (SImode, PC_REGNUM);
24364 emit_move_insn (work_reg, x);
24366 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24367 x = gen_frame_mem (SImode, x);
24368 emit_move_insn (x, work_reg);
24371 x = gen_rtx_REG (SImode, LR_REGNUM);
24372 emit_move_insn (work_reg, x);
24374 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24375 x = gen_frame_mem (SImode, x);
24376 emit_move_insn (x, work_reg);
24378 x = GEN_INT (offset + 12);
24379 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24381 emit_move_insn (arm_hfp_rtx, work_reg);
24383 /* Optimization: If we are not pushing any low registers but we are going
24384 to push some high registers then delay our first push. This will just
24385 be a push of LR and we can combine it with the push of the first high
24386 register. */
24387 else if ((l_mask & 0xff) != 0
24388 || (high_regs_pushed == 0 && l_mask))
24390 unsigned long mask = l_mask;
24391 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24392 insn = thumb1_emit_multi_reg_push (mask, mask);
24393 RTX_FRAME_RELATED_P (insn) = 1;
24396 if (high_regs_pushed)
24398 unsigned pushable_regs;
24399 unsigned next_hi_reg;
24400 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24401 : crtl->args.info.nregs;
24402 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24404 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24405 if (live_regs_mask & (1 << next_hi_reg))
24406 break;
24408 /* Here we need to mask out registers used for passing arguments
24409 even if they can be pushed. This is to avoid using them to stash the high
24410 registers. Such kind of stash may clobber the use of arguments. */
24411 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
24413 if (pushable_regs == 0)
24414 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24416 while (high_regs_pushed > 0)
24418 unsigned long real_regs_mask = 0;
24420 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
24422 if (pushable_regs & (1 << regno))
24424 emit_move_insn (gen_rtx_REG (SImode, regno),
24425 gen_rtx_REG (SImode, next_hi_reg));
24427 high_regs_pushed --;
24428 real_regs_mask |= (1 << next_hi_reg);
24430 if (high_regs_pushed)
24432 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24433 next_hi_reg --)
24434 if (live_regs_mask & (1 << next_hi_reg))
24435 break;
24437 else
24439 pushable_regs &= ~((1 << regno) - 1);
24440 break;
24445 /* If we had to find a work register and we have not yet
24446 saved the LR then add it to the list of regs to push. */
24447 if (l_mask == (1 << LR_REGNUM))
24449 pushable_regs |= l_mask;
24450 real_regs_mask |= l_mask;
24451 l_mask = 0;
24454 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
24455 RTX_FRAME_RELATED_P (insn) = 1;
24459 /* Load the pic register before setting the frame pointer,
24460 so we can use r7 as a temporary work register. */
24461 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24462 arm_load_pic_register (live_regs_mask);
24464 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24465 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24466 stack_pointer_rtx);
24468 if (flag_stack_usage_info)
24469 current_function_static_stack_size
24470 = offsets->outgoing_args - offsets->saved_args;
24472 amount = offsets->outgoing_args - offsets->saved_regs;
24473 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24474 if (amount)
24476 if (amount < 512)
24478 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24479 GEN_INT (- amount)));
24480 RTX_FRAME_RELATED_P (insn) = 1;
24482 else
24484 rtx reg, dwarf;
24486 /* The stack decrement is too big for an immediate value in a single
24487 insn. In theory we could issue multiple subtracts, but after
24488 three of them it becomes more space efficient to place the full
24489 value in the constant pool and load into a register. (Also the
24490 ARM debugger really likes to see only one stack decrement per
24491 function). So instead we look for a scratch register into which
24492 we can load the decrement, and then we subtract this from the
24493 stack pointer. Unfortunately on the thumb the only available
24494 scratch registers are the argument registers, and we cannot use
24495 these as they may hold arguments to the function. Instead we
24496 attempt to locate a call preserved register which is used by this
24497 function. If we can find one, then we know that it will have
24498 been pushed at the start of the prologue and so we can corrupt
24499 it now. */
24500 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24501 if (live_regs_mask & (1 << regno))
24502 break;
24504 gcc_assert(regno <= LAST_LO_REGNUM);
24506 reg = gen_rtx_REG (SImode, regno);
24508 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24510 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24511 stack_pointer_rtx, reg));
24513 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
24514 plus_constant (Pmode, stack_pointer_rtx,
24515 -amount));
24516 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24517 RTX_FRAME_RELATED_P (insn) = 1;
24521 if (frame_pointer_needed)
24522 thumb_set_frame_pointer (offsets);
24524 /* If we are profiling, make sure no instructions are scheduled before
24525 the call to mcount. Similarly if the user has requested no
24526 scheduling in the prolog. Similarly if we want non-call exceptions
24527 using the EABI unwinder, to prevent faulting instructions from being
24528 swapped with a stack adjustment. */
24529 if (crtl->profile || !TARGET_SCHED_PROLOG
24530 || (arm_except_unwind_info (&global_options) == UI_TARGET
24531 && cfun->can_throw_non_call_exceptions))
24532 emit_insn (gen_blockage ());
24534 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24535 if (live_regs_mask & 0xff)
24536 cfun->machine->lr_save_eliminated = 0;
24539 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
24540 POP instruction can be generated. LR should be replaced by PC. All
24541 the checks required are already done by USE_RETURN_INSN (). Hence,
24542 all we really need to check here is if single register is to be
24543 returned, or multiple register return. */
24544 void
24545 thumb2_expand_return (bool simple_return)
24547 int i, num_regs;
24548 unsigned long saved_regs_mask;
24549 arm_stack_offsets *offsets;
24551 offsets = arm_get_frame_offsets ();
24552 saved_regs_mask = offsets->saved_regs_mask;
24554 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
24555 if (saved_regs_mask & (1 << i))
24556 num_regs++;
24558 if (!simple_return && saved_regs_mask)
24560 if (num_regs == 1)
24562 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
24563 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
24564 rtx addr = gen_rtx_MEM (SImode,
24565 gen_rtx_POST_INC (SImode,
24566 stack_pointer_rtx));
24567 set_mem_alias_set (addr, get_frame_alias_set ());
24568 XVECEXP (par, 0, 0) = ret_rtx;
24569 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
24570 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
24571 emit_jump_insn (par);
24573 else
24575 saved_regs_mask &= ~ (1 << LR_REGNUM);
24576 saved_regs_mask |= (1 << PC_REGNUM);
24577 arm_emit_multi_reg_pop (saved_regs_mask);
24580 else
24582 emit_jump_insn (simple_return_rtx);
24586 void
24587 thumb1_expand_epilogue (void)
24589 HOST_WIDE_INT amount;
24590 arm_stack_offsets *offsets;
24591 int regno;
24593 /* Naked functions don't have prologues. */
24594 if (IS_NAKED (arm_current_func_type ()))
24595 return;
24597 offsets = arm_get_frame_offsets ();
24598 amount = offsets->outgoing_args - offsets->saved_regs;
24600 if (frame_pointer_needed)
24602 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
24603 amount = offsets->locals_base - offsets->saved_regs;
24605 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
24607 gcc_assert (amount >= 0);
24608 if (amount)
24610 emit_insn (gen_blockage ());
24612 if (amount < 512)
24613 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24614 GEN_INT (amount)));
24615 else
24617 /* r3 is always free in the epilogue. */
24618 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
24620 emit_insn (gen_movsi (reg, GEN_INT (amount)));
24621 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
24625 /* Emit a USE (stack_pointer_rtx), so that
24626 the stack adjustment will not be deleted. */
24627 emit_insn (gen_force_register_use (stack_pointer_rtx));
24629 if (crtl->profile || !TARGET_SCHED_PROLOG)
24630 emit_insn (gen_blockage ());
24632 /* Emit a clobber for each insn that will be restored in the epilogue,
24633 so that flow2 will get register lifetimes correct. */
24634 for (regno = 0; regno < 13; regno++)
24635 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
24636 emit_clobber (gen_rtx_REG (SImode, regno));
24638 if (! df_regs_ever_live_p (LR_REGNUM))
24639 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
24642 /* Epilogue code for APCS frame. */
24643 static void
24644 arm_expand_epilogue_apcs_frame (bool really_return)
24646 unsigned long func_type;
24647 unsigned long saved_regs_mask;
24648 int num_regs = 0;
24649 int i;
24650 int floats_from_frame = 0;
24651 arm_stack_offsets *offsets;
24653 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
24654 func_type = arm_current_func_type ();
24656 /* Get frame offsets for ARM. */
24657 offsets = arm_get_frame_offsets ();
24658 saved_regs_mask = offsets->saved_regs_mask;
24660 /* Find the offset of the floating-point save area in the frame. */
24661 floats_from_frame
24662 = (offsets->saved_args
24663 + arm_compute_static_chain_stack_bytes ()
24664 - offsets->frame);
24666 /* Compute how many core registers saved and how far away the floats are. */
24667 for (i = 0; i <= LAST_ARM_REGNUM; i++)
24668 if (saved_regs_mask & (1 << i))
24670 num_regs++;
24671 floats_from_frame += 4;
24674 if (TARGET_HARD_FLOAT && TARGET_VFP)
24676 int start_reg;
24677 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
24679 /* The offset is from IP_REGNUM. */
24680 int saved_size = arm_get_vfp_saved_size ();
24681 if (saved_size > 0)
24683 rtx_insn *insn;
24684 floats_from_frame += saved_size;
24685 insn = emit_insn (gen_addsi3 (ip_rtx,
24686 hard_frame_pointer_rtx,
24687 GEN_INT (-floats_from_frame)));
24688 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
24689 ip_rtx, hard_frame_pointer_rtx);
24692 /* Generate VFP register multi-pop. */
24693 start_reg = FIRST_VFP_REGNUM;
24695 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
24696 /* Look for a case where a reg does not need restoring. */
24697 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24698 && (!df_regs_ever_live_p (i + 1)
24699 || call_used_regs[i + 1]))
24701 if (start_reg != i)
24702 arm_emit_vfp_multi_reg_pop (start_reg,
24703 (i - start_reg) / 2,
24704 gen_rtx_REG (SImode,
24705 IP_REGNUM));
24706 start_reg = i + 2;
24709 /* Restore the remaining regs that we have discovered (or possibly
24710 even all of them, if the conditional in the for loop never
24711 fired). */
24712 if (start_reg != i)
24713 arm_emit_vfp_multi_reg_pop (start_reg,
24714 (i - start_reg) / 2,
24715 gen_rtx_REG (SImode, IP_REGNUM));
24718 if (TARGET_IWMMXT)
24720 /* The frame pointer is guaranteed to be non-double-word aligned, as
24721 it is set to double-word-aligned old_stack_pointer - 4. */
24722 rtx_insn *insn;
24723 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
24725 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
24726 if (df_regs_ever_live_p (i) && !call_used_regs[i])
24728 rtx addr = gen_frame_mem (V2SImode,
24729 plus_constant (Pmode, hard_frame_pointer_rtx,
24730 - lrm_count * 4));
24731 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24732 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24733 gen_rtx_REG (V2SImode, i),
24734 NULL_RTX);
24735 lrm_count += 2;
24739 /* saved_regs_mask should contain IP which contains old stack pointer
24740 at the time of activation creation. Since SP and IP are adjacent registers,
24741 we can restore the value directly into SP. */
24742 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
24743 saved_regs_mask &= ~(1 << IP_REGNUM);
24744 saved_regs_mask |= (1 << SP_REGNUM);
24746 /* There are two registers left in saved_regs_mask - LR and PC. We
24747 only need to restore LR (the return address), but to
24748 save time we can load it directly into PC, unless we need a
24749 special function exit sequence, or we are not really returning. */
24750 if (really_return
24751 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
24752 && !crtl->calls_eh_return)
24753 /* Delete LR from the register mask, so that LR on
24754 the stack is loaded into the PC in the register mask. */
24755 saved_regs_mask &= ~(1 << LR_REGNUM);
24756 else
24757 saved_regs_mask &= ~(1 << PC_REGNUM);
24759 num_regs = bit_count (saved_regs_mask);
24760 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
24762 rtx_insn *insn;
24763 emit_insn (gen_blockage ());
24764 /* Unwind the stack to just below the saved registers. */
24765 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24766 hard_frame_pointer_rtx,
24767 GEN_INT (- 4 * num_regs)));
24769 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
24770 stack_pointer_rtx, hard_frame_pointer_rtx);
24773 arm_emit_multi_reg_pop (saved_regs_mask);
24775 if (IS_INTERRUPT (func_type))
24777 /* Interrupt handlers will have pushed the
24778 IP onto the stack, so restore it now. */
24779 rtx_insn *insn;
24780 rtx addr = gen_rtx_MEM (SImode,
24781 gen_rtx_POST_INC (SImode,
24782 stack_pointer_rtx));
24783 set_mem_alias_set (addr, get_frame_alias_set ());
24784 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
24785 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24786 gen_rtx_REG (SImode, IP_REGNUM),
24787 NULL_RTX);
24790 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
24791 return;
24793 if (crtl->calls_eh_return)
24794 emit_insn (gen_addsi3 (stack_pointer_rtx,
24795 stack_pointer_rtx,
24796 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
24798 if (IS_STACKALIGN (func_type))
24799 /* Restore the original stack pointer. Before prologue, the stack was
24800 realigned and the original stack pointer saved in r0. For details,
24801 see comment in arm_expand_prologue. */
24802 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
24804 emit_jump_insn (simple_return_rtx);
24807 /* Generate RTL to represent ARM epilogue. Really_return is true if the
24808 function is not a sibcall. */
24809 void
24810 arm_expand_epilogue (bool really_return)
24812 unsigned long func_type;
24813 unsigned long saved_regs_mask;
24814 int num_regs = 0;
24815 int i;
24816 int amount;
24817 arm_stack_offsets *offsets;
24819 func_type = arm_current_func_type ();
24821 /* Naked functions don't have epilogue. Hence, generate return pattern, and
24822 let output_return_instruction take care of instruction emission if any. */
24823 if (IS_NAKED (func_type)
24824 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
24826 if (really_return)
24827 emit_jump_insn (simple_return_rtx);
24828 return;
24831 /* If we are throwing an exception, then we really must be doing a
24832 return, so we can't tail-call. */
24833 gcc_assert (!crtl->calls_eh_return || really_return);
24835 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
24837 arm_expand_epilogue_apcs_frame (really_return);
24838 return;
24841 /* Get frame offsets for ARM. */
24842 offsets = arm_get_frame_offsets ();
24843 saved_regs_mask = offsets->saved_regs_mask;
24844 num_regs = bit_count (saved_regs_mask);
24846 if (frame_pointer_needed)
24848 rtx_insn *insn;
24849 /* Restore stack pointer if necessary. */
24850 if (TARGET_ARM)
24852 /* In ARM mode, frame pointer points to first saved register.
24853 Restore stack pointer to last saved register. */
24854 amount = offsets->frame - offsets->saved_regs;
24856 /* Force out any pending memory operations that reference stacked data
24857 before stack de-allocation occurs. */
24858 emit_insn (gen_blockage ());
24859 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24860 hard_frame_pointer_rtx,
24861 GEN_INT (amount)));
24862 arm_add_cfa_adjust_cfa_note (insn, amount,
24863 stack_pointer_rtx,
24864 hard_frame_pointer_rtx);
24866 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24867 deleted. */
24868 emit_insn (gen_force_register_use (stack_pointer_rtx));
24870 else
24872 /* In Thumb-2 mode, the frame pointer points to the last saved
24873 register. */
24874 amount = offsets->locals_base - offsets->saved_regs;
24875 if (amount)
24877 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
24878 hard_frame_pointer_rtx,
24879 GEN_INT (amount)));
24880 arm_add_cfa_adjust_cfa_note (insn, amount,
24881 hard_frame_pointer_rtx,
24882 hard_frame_pointer_rtx);
24885 /* Force out any pending memory operations that reference stacked data
24886 before stack de-allocation occurs. */
24887 emit_insn (gen_blockage ());
24888 insn = emit_insn (gen_movsi (stack_pointer_rtx,
24889 hard_frame_pointer_rtx));
24890 arm_add_cfa_adjust_cfa_note (insn, 0,
24891 stack_pointer_rtx,
24892 hard_frame_pointer_rtx);
24893 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24894 deleted. */
24895 emit_insn (gen_force_register_use (stack_pointer_rtx));
24898 else
24900 /* Pop off outgoing args and local frame to adjust stack pointer to
24901 last saved register. */
24902 amount = offsets->outgoing_args - offsets->saved_regs;
24903 if (amount)
24905 rtx_insn *tmp;
24906 /* Force out any pending memory operations that reference stacked data
24907 before stack de-allocation occurs. */
24908 emit_insn (gen_blockage ());
24909 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
24910 stack_pointer_rtx,
24911 GEN_INT (amount)));
24912 arm_add_cfa_adjust_cfa_note (tmp, amount,
24913 stack_pointer_rtx, stack_pointer_rtx);
24914 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
24915 not deleted. */
24916 emit_insn (gen_force_register_use (stack_pointer_rtx));
24920 if (TARGET_HARD_FLOAT && TARGET_VFP)
24922 /* Generate VFP register multi-pop. */
24923 int end_reg = LAST_VFP_REGNUM + 1;
24925 /* Scan the registers in reverse order. We need to match
24926 any groupings made in the prologue and generate matching
24927 vldm operations. The need to match groups is because,
24928 unlike pop, vldm can only do consecutive regs. */
24929 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
24930 /* Look for a case where a reg does not need restoring. */
24931 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24932 && (!df_regs_ever_live_p (i + 1)
24933 || call_used_regs[i + 1]))
24935 /* Restore the regs discovered so far (from reg+2 to
24936 end_reg). */
24937 if (end_reg > i + 2)
24938 arm_emit_vfp_multi_reg_pop (i + 2,
24939 (end_reg - (i + 2)) / 2,
24940 stack_pointer_rtx);
24941 end_reg = i;
24944 /* Restore the remaining regs that we have discovered (or possibly
24945 even all of them, if the conditional in the for loop never
24946 fired). */
24947 if (end_reg > i + 2)
24948 arm_emit_vfp_multi_reg_pop (i + 2,
24949 (end_reg - (i + 2)) / 2,
24950 stack_pointer_rtx);
24953 if (TARGET_IWMMXT)
24954 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
24955 if (df_regs_ever_live_p (i) && !call_used_regs[i])
24957 rtx_insn *insn;
24958 rtx addr = gen_rtx_MEM (V2SImode,
24959 gen_rtx_POST_INC (SImode,
24960 stack_pointer_rtx));
24961 set_mem_alias_set (addr, get_frame_alias_set ());
24962 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24963 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24964 gen_rtx_REG (V2SImode, i),
24965 NULL_RTX);
24966 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
24967 stack_pointer_rtx, stack_pointer_rtx);
24970 if (saved_regs_mask)
24972 rtx insn;
24973 bool return_in_pc = false;
24975 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
24976 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
24977 && !IS_STACKALIGN (func_type)
24978 && really_return
24979 && crtl->args.pretend_args_size == 0
24980 && saved_regs_mask & (1 << LR_REGNUM)
24981 && !crtl->calls_eh_return)
24983 saved_regs_mask &= ~(1 << LR_REGNUM);
24984 saved_regs_mask |= (1 << PC_REGNUM);
24985 return_in_pc = true;
24988 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
24990 for (i = 0; i <= LAST_ARM_REGNUM; i++)
24991 if (saved_regs_mask & (1 << i))
24993 rtx addr = gen_rtx_MEM (SImode,
24994 gen_rtx_POST_INC (SImode,
24995 stack_pointer_rtx));
24996 set_mem_alias_set (addr, get_frame_alias_set ());
24998 if (i == PC_REGNUM)
25000 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25001 XVECEXP (insn, 0, 0) = ret_rtx;
25002 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
25003 gen_rtx_REG (SImode, i),
25004 addr);
25005 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25006 insn = emit_jump_insn (insn);
25008 else
25010 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25011 addr));
25012 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25013 gen_rtx_REG (SImode, i),
25014 NULL_RTX);
25015 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25016 stack_pointer_rtx,
25017 stack_pointer_rtx);
25021 else
25023 if (TARGET_LDRD
25024 && current_tune->prefer_ldrd_strd
25025 && !optimize_function_for_size_p (cfun))
25027 if (TARGET_THUMB2)
25028 thumb2_emit_ldrd_pop (saved_regs_mask);
25029 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25030 arm_emit_ldrd_pop (saved_regs_mask);
25031 else
25032 arm_emit_multi_reg_pop (saved_regs_mask);
25034 else
25035 arm_emit_multi_reg_pop (saved_regs_mask);
25038 if (return_in_pc)
25039 return;
25042 if (crtl->args.pretend_args_size)
25044 int i, j;
25045 rtx dwarf = NULL_RTX;
25046 rtx_insn *tmp =
25047 emit_insn (gen_addsi3 (stack_pointer_rtx,
25048 stack_pointer_rtx,
25049 GEN_INT (crtl->args.pretend_args_size)));
25051 RTX_FRAME_RELATED_P (tmp) = 1;
25053 if (cfun->machine->uses_anonymous_args)
25055 /* Restore pretend args. Refer arm_expand_prologue on how to save
25056 pretend_args in stack. */
25057 int num_regs = crtl->args.pretend_args_size / 4;
25058 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25059 for (j = 0, i = 0; j < num_regs; i++)
25060 if (saved_regs_mask & (1 << i))
25062 rtx reg = gen_rtx_REG (SImode, i);
25063 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25064 j++;
25066 REG_NOTES (tmp) = dwarf;
25068 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
25069 stack_pointer_rtx, stack_pointer_rtx);
25072 if (!really_return)
25073 return;
25075 if (crtl->calls_eh_return)
25076 emit_insn (gen_addsi3 (stack_pointer_rtx,
25077 stack_pointer_rtx,
25078 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25080 if (IS_STACKALIGN (func_type))
25081 /* Restore the original stack pointer. Before prologue, the stack was
25082 realigned and the original stack pointer saved in r0. For details,
25083 see comment in arm_expand_prologue. */
25084 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25086 emit_jump_insn (simple_return_rtx);
25089 /* Implementation of insn prologue_thumb1_interwork. This is the first
25090 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25092 const char *
25093 thumb1_output_interwork (void)
25095 const char * name;
25096 FILE *f = asm_out_file;
25098 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25099 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25100 == SYMBOL_REF);
25101 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25103 /* Generate code sequence to switch us into Thumb mode. */
25104 /* The .code 32 directive has already been emitted by
25105 ASM_DECLARE_FUNCTION_NAME. */
25106 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25107 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25109 /* Generate a label, so that the debugger will notice the
25110 change in instruction sets. This label is also used by
25111 the assembler to bypass the ARM code when this function
25112 is called from a Thumb encoded function elsewhere in the
25113 same file. Hence the definition of STUB_NAME here must
25114 agree with the definition in gas/config/tc-arm.c. */
25116 #define STUB_NAME ".real_start_of"
25118 fprintf (f, "\t.code\t16\n");
25119 #ifdef ARM_PE
25120 if (arm_dllexport_name_p (name))
25121 name = arm_strip_name_encoding (name);
25122 #endif
25123 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25124 fprintf (f, "\t.thumb_func\n");
25125 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25127 return "";
25130 /* Handle the case of a double word load into a low register from
25131 a computed memory address. The computed address may involve a
25132 register which is overwritten by the load. */
25133 const char *
25134 thumb_load_double_from_address (rtx *operands)
25136 rtx addr;
25137 rtx base;
25138 rtx offset;
25139 rtx arg1;
25140 rtx arg2;
25142 gcc_assert (REG_P (operands[0]));
25143 gcc_assert (MEM_P (operands[1]));
25145 /* Get the memory address. */
25146 addr = XEXP (operands[1], 0);
25148 /* Work out how the memory address is computed. */
25149 switch (GET_CODE (addr))
25151 case REG:
25152 operands[2] = adjust_address (operands[1], SImode, 4);
25154 if (REGNO (operands[0]) == REGNO (addr))
25156 output_asm_insn ("ldr\t%H0, %2", operands);
25157 output_asm_insn ("ldr\t%0, %1", operands);
25159 else
25161 output_asm_insn ("ldr\t%0, %1", operands);
25162 output_asm_insn ("ldr\t%H0, %2", operands);
25164 break;
25166 case CONST:
25167 /* Compute <address> + 4 for the high order load. */
25168 operands[2] = adjust_address (operands[1], SImode, 4);
25170 output_asm_insn ("ldr\t%0, %1", operands);
25171 output_asm_insn ("ldr\t%H0, %2", operands);
25172 break;
25174 case PLUS:
25175 arg1 = XEXP (addr, 0);
25176 arg2 = XEXP (addr, 1);
25178 if (CONSTANT_P (arg1))
25179 base = arg2, offset = arg1;
25180 else
25181 base = arg1, offset = arg2;
25183 gcc_assert (REG_P (base));
25185 /* Catch the case of <address> = <reg> + <reg> */
25186 if (REG_P (offset))
25188 int reg_offset = REGNO (offset);
25189 int reg_base = REGNO (base);
25190 int reg_dest = REGNO (operands[0]);
25192 /* Add the base and offset registers together into the
25193 higher destination register. */
25194 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25195 reg_dest + 1, reg_base, reg_offset);
25197 /* Load the lower destination register from the address in
25198 the higher destination register. */
25199 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25200 reg_dest, reg_dest + 1);
25202 /* Load the higher destination register from its own address
25203 plus 4. */
25204 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25205 reg_dest + 1, reg_dest + 1);
25207 else
25209 /* Compute <address> + 4 for the high order load. */
25210 operands[2] = adjust_address (operands[1], SImode, 4);
25212 /* If the computed address is held in the low order register
25213 then load the high order register first, otherwise always
25214 load the low order register first. */
25215 if (REGNO (operands[0]) == REGNO (base))
25217 output_asm_insn ("ldr\t%H0, %2", operands);
25218 output_asm_insn ("ldr\t%0, %1", operands);
25220 else
25222 output_asm_insn ("ldr\t%0, %1", operands);
25223 output_asm_insn ("ldr\t%H0, %2", operands);
25226 break;
25228 case LABEL_REF:
25229 /* With no registers to worry about we can just load the value
25230 directly. */
25231 operands[2] = adjust_address (operands[1], SImode, 4);
25233 output_asm_insn ("ldr\t%H0, %2", operands);
25234 output_asm_insn ("ldr\t%0, %1", operands);
25235 break;
25237 default:
25238 gcc_unreachable ();
25241 return "";
25244 const char *
25245 thumb_output_move_mem_multiple (int n, rtx *operands)
25247 rtx tmp;
25249 switch (n)
25251 case 2:
25252 if (REGNO (operands[4]) > REGNO (operands[5]))
25254 tmp = operands[4];
25255 operands[4] = operands[5];
25256 operands[5] = tmp;
25258 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25259 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25260 break;
25262 case 3:
25263 if (REGNO (operands[4]) > REGNO (operands[5]))
25264 std::swap (operands[4], operands[5]);
25265 if (REGNO (operands[5]) > REGNO (operands[6]))
25266 std::swap (operands[5], operands[6]);
25267 if (REGNO (operands[4]) > REGNO (operands[5]))
25268 std::swap (operands[4], operands[5]);
25270 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25271 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25272 break;
25274 default:
25275 gcc_unreachable ();
25278 return "";
25281 /* Output a call-via instruction for thumb state. */
25282 const char *
25283 thumb_call_via_reg (rtx reg)
25285 int regno = REGNO (reg);
25286 rtx *labelp;
25288 gcc_assert (regno < LR_REGNUM);
25290 /* If we are in the normal text section we can use a single instance
25291 per compilation unit. If we are doing function sections, then we need
25292 an entry per section, since we can't rely on reachability. */
25293 if (in_section == text_section)
25295 thumb_call_reg_needed = 1;
25297 if (thumb_call_via_label[regno] == NULL)
25298 thumb_call_via_label[regno] = gen_label_rtx ();
25299 labelp = thumb_call_via_label + regno;
25301 else
25303 if (cfun->machine->call_via[regno] == NULL)
25304 cfun->machine->call_via[regno] = gen_label_rtx ();
25305 labelp = cfun->machine->call_via + regno;
25308 output_asm_insn ("bl\t%a0", labelp);
25309 return "";
25312 /* Routines for generating rtl. */
25313 void
25314 thumb_expand_movmemqi (rtx *operands)
25316 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25317 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25318 HOST_WIDE_INT len = INTVAL (operands[2]);
25319 HOST_WIDE_INT offset = 0;
25321 while (len >= 12)
25323 emit_insn (gen_movmem12b (out, in, out, in));
25324 len -= 12;
25327 if (len >= 8)
25329 emit_insn (gen_movmem8b (out, in, out, in));
25330 len -= 8;
25333 if (len >= 4)
25335 rtx reg = gen_reg_rtx (SImode);
25336 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25337 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25338 len -= 4;
25339 offset += 4;
25342 if (len >= 2)
25344 rtx reg = gen_reg_rtx (HImode);
25345 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25346 plus_constant (Pmode, in,
25347 offset))));
25348 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25349 offset)),
25350 reg));
25351 len -= 2;
25352 offset += 2;
25355 if (len)
25357 rtx reg = gen_reg_rtx (QImode);
25358 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25359 plus_constant (Pmode, in,
25360 offset))));
25361 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25362 offset)),
25363 reg));
25367 void
25368 thumb_reload_out_hi (rtx *operands)
25370 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25373 /* Handle reading a half-word from memory during reload. */
25374 void
25375 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
25377 gcc_unreachable ();
25380 /* Return the length of a function name prefix
25381 that starts with the character 'c'. */
25382 static int
25383 arm_get_strip_length (int c)
25385 switch (c)
25387 ARM_NAME_ENCODING_LENGTHS
25388 default: return 0;
25392 /* Return a pointer to a function's name with any
25393 and all prefix encodings stripped from it. */
25394 const char *
25395 arm_strip_name_encoding (const char *name)
25397 int skip;
25399 while ((skip = arm_get_strip_length (* name)))
25400 name += skip;
25402 return name;
25405 /* If there is a '*' anywhere in the name's prefix, then
25406 emit the stripped name verbatim, otherwise prepend an
25407 underscore if leading underscores are being used. */
25408 void
25409 arm_asm_output_labelref (FILE *stream, const char *name)
25411 int skip;
25412 int verbatim = 0;
25414 while ((skip = arm_get_strip_length (* name)))
25416 verbatim |= (*name == '*');
25417 name += skip;
25420 if (verbatim)
25421 fputs (name, stream);
25422 else
25423 asm_fprintf (stream, "%U%s", name);
25426 /* This function is used to emit an EABI tag and its associated value.
25427 We emit the numerical value of the tag in case the assembler does not
25428 support textual tags. (Eg gas prior to 2.20). If requested we include
25429 the tag name in a comment so that anyone reading the assembler output
25430 will know which tag is being set.
25432 This function is not static because arm-c.c needs it too. */
25434 void
25435 arm_emit_eabi_attribute (const char *name, int num, int val)
25437 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
25438 if (flag_verbose_asm || flag_debug_asm)
25439 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
25440 asm_fprintf (asm_out_file, "\n");
25443 /* This function is used to print CPU tuning information as comment
25444 in assembler file. Pointers are not printed for now. */
25446 void
25447 arm_print_tune_info (void)
25449 asm_fprintf (asm_out_file, "\t@.tune parameters\n");
25450 asm_fprintf (asm_out_file, "\t\t@constant_limit:\t%d\n",
25451 current_tune->constant_limit);
25452 asm_fprintf (asm_out_file, "\t\t@max_insns_skipped:\t%d\n",
25453 current_tune->max_insns_skipped);
25454 asm_fprintf (asm_out_file, "\t\t@num_prefetch_slots:\t%d\n",
25455 current_tune->num_prefetch_slots);
25456 asm_fprintf (asm_out_file, "\t\t@l1_cache_size:\t%d\n",
25457 current_tune->l1_cache_size);
25458 asm_fprintf (asm_out_file, "\t\t@l1_cache_line_size:\t%d\n",
25459 current_tune->l1_cache_line_size);
25460 asm_fprintf (asm_out_file, "\t\t@prefer_constant_pool:\t%d\n",
25461 (int) current_tune->prefer_constant_pool);
25462 asm_fprintf (asm_out_file, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
25463 asm_fprintf (asm_out_file, "\t\t\t\ts&p\tcost\n");
25464 asm_fprintf (asm_out_file, "\t\t\t\t00\t%d\n",
25465 current_tune->branch_cost (false, false));
25466 asm_fprintf (asm_out_file, "\t\t\t\t01\t%d\n",
25467 current_tune->branch_cost (false, true));
25468 asm_fprintf (asm_out_file, "\t\t\t\t10\t%d\n",
25469 current_tune->branch_cost (true, false));
25470 asm_fprintf (asm_out_file, "\t\t\t\t11\t%d\n",
25471 current_tune->branch_cost (true, true));
25472 asm_fprintf (asm_out_file, "\t\t@prefer_ldrd_strd:\t%d\n",
25473 (int) current_tune->prefer_ldrd_strd);
25474 asm_fprintf (asm_out_file, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
25475 (int) current_tune->logical_op_non_short_circuit[0],
25476 (int) current_tune->logical_op_non_short_circuit[1]);
25477 asm_fprintf (asm_out_file, "\t\t@prefer_neon_for_64bits:\t%d\n",
25478 (int) current_tune->prefer_neon_for_64bits);
25479 asm_fprintf (asm_out_file,
25480 "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
25481 (int) current_tune->disparage_flag_setting_t16_encodings);
25482 asm_fprintf (asm_out_file,
25483 "\t\t@disparage_partial_flag_setting_t16_encodings:\t%d\n",
25484 (int) current_tune
25485 ->disparage_partial_flag_setting_t16_encodings);
25486 asm_fprintf (asm_out_file, "\t\t@string_ops_prefer_neon:\t%d\n",
25487 (int) current_tune->string_ops_prefer_neon);
25488 asm_fprintf (asm_out_file, "\t\t@max_insns_inline_memset:\t%d\n",
25489 current_tune->max_insns_inline_memset);
25490 asm_fprintf (asm_out_file, "\t\t@fuseable_ops:\t%u\n",
25491 current_tune->fuseable_ops);
25492 asm_fprintf (asm_out_file, "\t\t@sched_autopref:\t%d\n",
25493 (int) current_tune->sched_autopref);
25496 static void
25497 arm_file_start (void)
25499 int val;
25501 if (TARGET_UNIFIED_ASM)
25502 asm_fprintf (asm_out_file, "\t.syntax unified\n");
25504 if (TARGET_BPABI)
25506 const char *fpu_name;
25507 if (arm_selected_arch)
25509 /* armv7ve doesn't support any extensions. */
25510 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
25512 /* Keep backward compatability for assemblers
25513 which don't support armv7ve. */
25514 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
25515 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
25516 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
25517 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
25518 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
25520 else
25522 const char* pos = strchr (arm_selected_arch->name, '+');
25523 if (pos)
25525 char buf[15];
25526 gcc_assert (strlen (arm_selected_arch->name)
25527 <= sizeof (buf) / sizeof (*pos));
25528 strncpy (buf, arm_selected_arch->name,
25529 (pos - arm_selected_arch->name) * sizeof (*pos));
25530 buf[pos - arm_selected_arch->name] = '\0';
25531 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
25532 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
25534 else
25535 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
25538 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
25539 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
25540 else
25542 const char* truncated_name
25543 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
25544 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
25547 if (print_tune_info)
25548 arm_print_tune_info ();
25550 if (TARGET_SOFT_FLOAT)
25552 fpu_name = "softvfp";
25554 else
25556 fpu_name = arm_fpu_desc->name;
25557 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
25559 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
25560 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
25562 if (TARGET_HARD_FLOAT_ABI)
25563 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
25566 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
25568 /* Some of these attributes only apply when the corresponding features
25569 are used. However we don't have any easy way of figuring this out.
25570 Conservatively record the setting that would have been used. */
25572 if (flag_rounding_math)
25573 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
25575 if (!flag_unsafe_math_optimizations)
25577 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
25578 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25580 if (flag_signaling_nans)
25581 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25583 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25584 flag_finite_math_only ? 1 : 3);
25586 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25587 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25588 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25589 flag_short_enums ? 1 : 2);
25591 /* Tag_ABI_optimization_goals. */
25592 if (optimize_size)
25593 val = 4;
25594 else if (optimize >= 2)
25595 val = 2;
25596 else if (optimize)
25597 val = 1;
25598 else
25599 val = 6;
25600 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
25602 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
25603 unaligned_access);
25605 if (arm_fp16_format)
25606 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
25607 (int) arm_fp16_format);
25609 if (arm_lang_output_object_attributes_hook)
25610 arm_lang_output_object_attributes_hook();
25613 default_file_start ();
25616 static void
25617 arm_file_end (void)
25619 int regno;
25621 if (NEED_INDICATE_EXEC_STACK)
25622 /* Add .note.GNU-stack. */
25623 file_end_indicate_exec_stack ();
25625 if (! thumb_call_reg_needed)
25626 return;
25628 switch_to_section (text_section);
25629 asm_fprintf (asm_out_file, "\t.code 16\n");
25630 ASM_OUTPUT_ALIGN (asm_out_file, 1);
25632 for (regno = 0; regno < LR_REGNUM; regno++)
25634 rtx label = thumb_call_via_label[regno];
25636 if (label != 0)
25638 targetm.asm_out.internal_label (asm_out_file, "L",
25639 CODE_LABEL_NUMBER (label));
25640 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
25645 #ifndef ARM_PE
25646 /* Symbols in the text segment can be accessed without indirecting via the
25647 constant pool; it may take an extra binary operation, but this is still
25648 faster than indirecting via memory. Don't do this when not optimizing,
25649 since we won't be calculating al of the offsets necessary to do this
25650 simplification. */
25652 static void
25653 arm_encode_section_info (tree decl, rtx rtl, int first)
25655 if (optimize > 0 && TREE_CONSTANT (decl))
25656 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
25658 default_encode_section_info (decl, rtl, first);
25660 #endif /* !ARM_PE */
25662 static void
25663 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
25665 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
25666 && !strcmp (prefix, "L"))
25668 arm_ccfsm_state = 0;
25669 arm_target_insn = NULL;
25671 default_internal_label (stream, prefix, labelno);
25674 /* Output code to add DELTA to the first argument, and then jump
25675 to FUNCTION. Used for C++ multiple inheritance. */
25676 static void
25677 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
25678 HOST_WIDE_INT delta,
25679 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
25680 tree function)
25682 static int thunk_label = 0;
25683 char label[256];
25684 char labelpc[256];
25685 int mi_delta = delta;
25686 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
25687 int shift = 0;
25688 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
25689 ? 1 : 0);
25690 if (mi_delta < 0)
25691 mi_delta = - mi_delta;
25693 final_start_function (emit_barrier (), file, 1);
25695 if (TARGET_THUMB1)
25697 int labelno = thunk_label++;
25698 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
25699 /* Thunks are entered in arm mode when avaiable. */
25700 if (TARGET_THUMB1_ONLY)
25702 /* push r3 so we can use it as a temporary. */
25703 /* TODO: Omit this save if r3 is not used. */
25704 fputs ("\tpush {r3}\n", file);
25705 fputs ("\tldr\tr3, ", file);
25707 else
25709 fputs ("\tldr\tr12, ", file);
25711 assemble_name (file, label);
25712 fputc ('\n', file);
25713 if (flag_pic)
25715 /* If we are generating PIC, the ldr instruction below loads
25716 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
25717 the address of the add + 8, so we have:
25719 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
25720 = target + 1.
25722 Note that we have "+ 1" because some versions of GNU ld
25723 don't set the low bit of the result for R_ARM_REL32
25724 relocations against thumb function symbols.
25725 On ARMv6M this is +4, not +8. */
25726 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
25727 assemble_name (file, labelpc);
25728 fputs (":\n", file);
25729 if (TARGET_THUMB1_ONLY)
25731 /* This is 2 insns after the start of the thunk, so we know it
25732 is 4-byte aligned. */
25733 fputs ("\tadd\tr3, pc, r3\n", file);
25734 fputs ("\tmov r12, r3\n", file);
25736 else
25737 fputs ("\tadd\tr12, pc, r12\n", file);
25739 else if (TARGET_THUMB1_ONLY)
25740 fputs ("\tmov r12, r3\n", file);
25742 if (TARGET_THUMB1_ONLY)
25744 if (mi_delta > 255)
25746 fputs ("\tldr\tr3, ", file);
25747 assemble_name (file, label);
25748 fputs ("+4\n", file);
25749 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
25750 mi_op, this_regno, this_regno);
25752 else if (mi_delta != 0)
25754 /* Thumb1 unified syntax requires s suffix in instruction name when
25755 one of the operands is immediate. */
25756 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
25757 mi_op, this_regno, this_regno,
25758 mi_delta);
25761 else
25763 /* TODO: Use movw/movt for large constants when available. */
25764 while (mi_delta != 0)
25766 if ((mi_delta & (3 << shift)) == 0)
25767 shift += 2;
25768 else
25770 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
25771 mi_op, this_regno, this_regno,
25772 mi_delta & (0xff << shift));
25773 mi_delta &= ~(0xff << shift);
25774 shift += 8;
25778 if (TARGET_THUMB1)
25780 if (TARGET_THUMB1_ONLY)
25781 fputs ("\tpop\t{r3}\n", file);
25783 fprintf (file, "\tbx\tr12\n");
25784 ASM_OUTPUT_ALIGN (file, 2);
25785 assemble_name (file, label);
25786 fputs (":\n", file);
25787 if (flag_pic)
25789 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
25790 rtx tem = XEXP (DECL_RTL (function), 0);
25791 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
25792 pipeline offset is four rather than eight. Adjust the offset
25793 accordingly. */
25794 tem = plus_constant (GET_MODE (tem), tem,
25795 TARGET_THUMB1_ONLY ? -3 : -7);
25796 tem = gen_rtx_MINUS (GET_MODE (tem),
25797 tem,
25798 gen_rtx_SYMBOL_REF (Pmode,
25799 ggc_strdup (labelpc)));
25800 assemble_integer (tem, 4, BITS_PER_WORD, 1);
25802 else
25803 /* Output ".word .LTHUNKn". */
25804 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
25806 if (TARGET_THUMB1_ONLY && mi_delta > 255)
25807 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
25809 else
25811 fputs ("\tb\t", file);
25812 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
25813 if (NEED_PLT_RELOC)
25814 fputs ("(PLT)", file);
25815 fputc ('\n', file);
25818 final_end_function ();
25822 arm_emit_vector_const (FILE *file, rtx x)
25824 int i;
25825 const char * pattern;
25827 gcc_assert (GET_CODE (x) == CONST_VECTOR);
25829 switch (GET_MODE (x))
25831 case V2SImode: pattern = "%08x"; break;
25832 case V4HImode: pattern = "%04x"; break;
25833 case V8QImode: pattern = "%02x"; break;
25834 default: gcc_unreachable ();
25837 fprintf (file, "0x");
25838 for (i = CONST_VECTOR_NUNITS (x); i--;)
25840 rtx element;
25842 element = CONST_VECTOR_ELT (x, i);
25843 fprintf (file, pattern, INTVAL (element));
25846 return 1;
25849 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
25850 HFmode constant pool entries are actually loaded with ldr. */
25851 void
25852 arm_emit_fp16_const (rtx c)
25854 REAL_VALUE_TYPE r;
25855 long bits;
25857 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
25858 bits = real_to_target (NULL, &r, HFmode);
25859 if (WORDS_BIG_ENDIAN)
25860 assemble_zeros (2);
25861 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
25862 if (!WORDS_BIG_ENDIAN)
25863 assemble_zeros (2);
25866 const char *
25867 arm_output_load_gr (rtx *operands)
25869 rtx reg;
25870 rtx offset;
25871 rtx wcgr;
25872 rtx sum;
25874 if (!MEM_P (operands [1])
25875 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
25876 || !REG_P (reg = XEXP (sum, 0))
25877 || !CONST_INT_P (offset = XEXP (sum, 1))
25878 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
25879 return "wldrw%?\t%0, %1";
25881 /* Fix up an out-of-range load of a GR register. */
25882 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
25883 wcgr = operands[0];
25884 operands[0] = reg;
25885 output_asm_insn ("ldr%?\t%0, %1", operands);
25887 operands[0] = wcgr;
25888 operands[1] = reg;
25889 output_asm_insn ("tmcr%?\t%0, %1", operands);
25890 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
25892 return "";
25895 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
25897 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
25898 named arg and all anonymous args onto the stack.
25899 XXX I know the prologue shouldn't be pushing registers, but it is faster
25900 that way. */
25902 static void
25903 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
25904 machine_mode mode,
25905 tree type,
25906 int *pretend_size,
25907 int second_time ATTRIBUTE_UNUSED)
25909 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
25910 int nregs;
25912 cfun->machine->uses_anonymous_args = 1;
25913 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
25915 nregs = pcum->aapcs_ncrn;
25916 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
25917 nregs++;
25919 else
25920 nregs = pcum->nregs;
25922 if (nregs < NUM_ARG_REGS)
25923 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
25926 /* We can't rely on the caller doing the proper promotion when
25927 using APCS or ATPCS. */
25929 static bool
25930 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
25932 return !TARGET_AAPCS_BASED;
25935 static machine_mode
25936 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
25937 machine_mode mode,
25938 int *punsignedp ATTRIBUTE_UNUSED,
25939 const_tree fntype ATTRIBUTE_UNUSED,
25940 int for_return ATTRIBUTE_UNUSED)
25942 if (GET_MODE_CLASS (mode) == MODE_INT
25943 && GET_MODE_SIZE (mode) < 4)
25944 return SImode;
25946 return mode;
25949 /* AAPCS based ABIs use short enums by default. */
25951 static bool
25952 arm_default_short_enums (void)
25954 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
25958 /* AAPCS requires that anonymous bitfields affect structure alignment. */
25960 static bool
25961 arm_align_anon_bitfield (void)
25963 return TARGET_AAPCS_BASED;
25967 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
25969 static tree
25970 arm_cxx_guard_type (void)
25972 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
25976 /* The EABI says test the least significant bit of a guard variable. */
25978 static bool
25979 arm_cxx_guard_mask_bit (void)
25981 return TARGET_AAPCS_BASED;
25985 /* The EABI specifies that all array cookies are 8 bytes long. */
25987 static tree
25988 arm_get_cookie_size (tree type)
25990 tree size;
25992 if (!TARGET_AAPCS_BASED)
25993 return default_cxx_get_cookie_size (type);
25995 size = build_int_cst (sizetype, 8);
25996 return size;
26000 /* The EABI says that array cookies should also contain the element size. */
26002 static bool
26003 arm_cookie_has_size (void)
26005 return TARGET_AAPCS_BASED;
26009 /* The EABI says constructors and destructors should return a pointer to
26010 the object constructed/destroyed. */
26012 static bool
26013 arm_cxx_cdtor_returns_this (void)
26015 return TARGET_AAPCS_BASED;
26018 /* The EABI says that an inline function may never be the key
26019 method. */
26021 static bool
26022 arm_cxx_key_method_may_be_inline (void)
26024 return !TARGET_AAPCS_BASED;
26027 static void
26028 arm_cxx_determine_class_data_visibility (tree decl)
26030 if (!TARGET_AAPCS_BASED
26031 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26032 return;
26034 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26035 is exported. However, on systems without dynamic vague linkage,
26036 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26037 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26038 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26039 else
26040 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26041 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26044 static bool
26045 arm_cxx_class_data_always_comdat (void)
26047 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26048 vague linkage if the class has no key function. */
26049 return !TARGET_AAPCS_BASED;
26053 /* The EABI says __aeabi_atexit should be used to register static
26054 destructors. */
26056 static bool
26057 arm_cxx_use_aeabi_atexit (void)
26059 return TARGET_AAPCS_BASED;
26063 void
26064 arm_set_return_address (rtx source, rtx scratch)
26066 arm_stack_offsets *offsets;
26067 HOST_WIDE_INT delta;
26068 rtx addr;
26069 unsigned long saved_regs;
26071 offsets = arm_get_frame_offsets ();
26072 saved_regs = offsets->saved_regs_mask;
26074 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26075 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26076 else
26078 if (frame_pointer_needed)
26079 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26080 else
26082 /* LR will be the first saved register. */
26083 delta = offsets->outgoing_args - (offsets->frame + 4);
26086 if (delta >= 4096)
26088 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26089 GEN_INT (delta & ~4095)));
26090 addr = scratch;
26091 delta &= 4095;
26093 else
26094 addr = stack_pointer_rtx;
26096 addr = plus_constant (Pmode, addr, delta);
26098 /* The store needs to be marked as frame related in order to prevent
26099 DSE from deleting it as dead if it is based on fp. */
26100 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26101 RTX_FRAME_RELATED_P (insn) = 1;
26102 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26107 void
26108 thumb_set_return_address (rtx source, rtx scratch)
26110 arm_stack_offsets *offsets;
26111 HOST_WIDE_INT delta;
26112 HOST_WIDE_INT limit;
26113 int reg;
26114 rtx addr;
26115 unsigned long mask;
26117 emit_use (source);
26119 offsets = arm_get_frame_offsets ();
26120 mask = offsets->saved_regs_mask;
26121 if (mask & (1 << LR_REGNUM))
26123 limit = 1024;
26124 /* Find the saved regs. */
26125 if (frame_pointer_needed)
26127 delta = offsets->soft_frame - offsets->saved_args;
26128 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26129 if (TARGET_THUMB1)
26130 limit = 128;
26132 else
26134 delta = offsets->outgoing_args - offsets->saved_args;
26135 reg = SP_REGNUM;
26137 /* Allow for the stack frame. */
26138 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26139 delta -= 16;
26140 /* The link register is always the first saved register. */
26141 delta -= 4;
26143 /* Construct the address. */
26144 addr = gen_rtx_REG (SImode, reg);
26145 if (delta > limit)
26147 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26148 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26149 addr = scratch;
26151 else
26152 addr = plus_constant (Pmode, addr, delta);
26154 /* The store needs to be marked as frame related in order to prevent
26155 DSE from deleting it as dead if it is based on fp. */
26156 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26157 RTX_FRAME_RELATED_P (insn) = 1;
26158 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26160 else
26161 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26164 /* Implements target hook vector_mode_supported_p. */
26165 bool
26166 arm_vector_mode_supported_p (machine_mode mode)
26168 /* Neon also supports V2SImode, etc. listed in the clause below. */
26169 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26170 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
26171 return true;
26173 if ((TARGET_NEON || TARGET_IWMMXT)
26174 && ((mode == V2SImode)
26175 || (mode == V4HImode)
26176 || (mode == V8QImode)))
26177 return true;
26179 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26180 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26181 || mode == V2HAmode))
26182 return true;
26184 return false;
26187 /* Implements target hook array_mode_supported_p. */
26189 static bool
26190 arm_array_mode_supported_p (machine_mode mode,
26191 unsigned HOST_WIDE_INT nelems)
26193 if (TARGET_NEON
26194 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26195 && (nelems >= 2 && nelems <= 4))
26196 return true;
26198 return false;
26201 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26202 registers when autovectorizing for Neon, at least until multiple vector
26203 widths are supported properly by the middle-end. */
26205 static machine_mode
26206 arm_preferred_simd_mode (machine_mode mode)
26208 if (TARGET_NEON)
26209 switch (mode)
26211 case SFmode:
26212 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26213 case SImode:
26214 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26215 case HImode:
26216 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26217 case QImode:
26218 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26219 case DImode:
26220 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26221 return V2DImode;
26222 break;
26224 default:;
26227 if (TARGET_REALLY_IWMMXT)
26228 switch (mode)
26230 case SImode:
26231 return V2SImode;
26232 case HImode:
26233 return V4HImode;
26234 case QImode:
26235 return V8QImode;
26237 default:;
26240 return word_mode;
26243 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26245 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26246 using r0-r4 for function arguments, r7 for the stack frame and don't have
26247 enough left over to do doubleword arithmetic. For Thumb-2 all the
26248 potentially problematic instructions accept high registers so this is not
26249 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26250 that require many low registers. */
26251 static bool
26252 arm_class_likely_spilled_p (reg_class_t rclass)
26254 if ((TARGET_THUMB1 && rclass == LO_REGS)
26255 || rclass == CC_REG)
26256 return true;
26258 return false;
26261 /* Implements target hook small_register_classes_for_mode_p. */
26262 bool
26263 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26265 return TARGET_THUMB1;
26268 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26269 ARM insns and therefore guarantee that the shift count is modulo 256.
26270 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26271 guarantee no particular behavior for out-of-range counts. */
26273 static unsigned HOST_WIDE_INT
26274 arm_shift_truncation_mask (machine_mode mode)
26276 return mode == SImode ? 255 : 0;
26280 /* Map internal gcc register numbers to DWARF2 register numbers. */
26282 unsigned int
26283 arm_dbx_register_number (unsigned int regno)
26285 if (regno < 16)
26286 return regno;
26288 if (IS_VFP_REGNUM (regno))
26290 /* See comment in arm_dwarf_register_span. */
26291 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26292 return 64 + regno - FIRST_VFP_REGNUM;
26293 else
26294 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26297 if (IS_IWMMXT_GR_REGNUM (regno))
26298 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26300 if (IS_IWMMXT_REGNUM (regno))
26301 return 112 + regno - FIRST_IWMMXT_REGNUM;
26303 gcc_unreachable ();
26306 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26307 GCC models tham as 64 32-bit registers, so we need to describe this to
26308 the DWARF generation code. Other registers can use the default. */
26309 static rtx
26310 arm_dwarf_register_span (rtx rtl)
26312 machine_mode mode;
26313 unsigned regno;
26314 rtx parts[16];
26315 int nregs;
26316 int i;
26318 regno = REGNO (rtl);
26319 if (!IS_VFP_REGNUM (regno))
26320 return NULL_RTX;
26322 /* XXX FIXME: The EABI defines two VFP register ranges:
26323 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26324 256-287: D0-D31
26325 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26326 corresponding D register. Until GDB supports this, we shall use the
26327 legacy encodings. We also use these encodings for D0-D15 for
26328 compatibility with older debuggers. */
26329 mode = GET_MODE (rtl);
26330 if (GET_MODE_SIZE (mode) < 8)
26331 return NULL_RTX;
26333 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26335 nregs = GET_MODE_SIZE (mode) / 4;
26336 for (i = 0; i < nregs; i += 2)
26337 if (TARGET_BIG_END)
26339 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
26340 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
26342 else
26344 parts[i] = gen_rtx_REG (SImode, regno + i);
26345 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
26348 else
26350 nregs = GET_MODE_SIZE (mode) / 8;
26351 for (i = 0; i < nregs; i++)
26352 parts[i] = gen_rtx_REG (DImode, regno + i);
26355 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
26358 #if ARM_UNWIND_INFO
26359 /* Emit unwind directives for a store-multiple instruction or stack pointer
26360 push during alignment.
26361 These should only ever be generated by the function prologue code, so
26362 expect them to have a particular form.
26363 The store-multiple instruction sometimes pushes pc as the last register,
26364 although it should not be tracked into unwind information, or for -Os
26365 sometimes pushes some dummy registers before first register that needs
26366 to be tracked in unwind information; such dummy registers are there just
26367 to avoid separate stack adjustment, and will not be restored in the
26368 epilogue. */
26370 static void
26371 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26373 int i;
26374 HOST_WIDE_INT offset;
26375 HOST_WIDE_INT nregs;
26376 int reg_size;
26377 unsigned reg;
26378 unsigned lastreg;
26379 unsigned padfirst = 0, padlast = 0;
26380 rtx e;
26382 e = XVECEXP (p, 0, 0);
26383 gcc_assert (GET_CODE (e) == SET);
26385 /* First insn will adjust the stack pointer. */
26386 gcc_assert (GET_CODE (e) == SET
26387 && REG_P (SET_DEST (e))
26388 && REGNO (SET_DEST (e)) == SP_REGNUM
26389 && GET_CODE (SET_SRC (e)) == PLUS);
26391 offset = -INTVAL (XEXP (SET_SRC (e), 1));
26392 nregs = XVECLEN (p, 0) - 1;
26393 gcc_assert (nregs);
26395 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
26396 if (reg < 16)
26398 /* For -Os dummy registers can be pushed at the beginning to
26399 avoid separate stack pointer adjustment. */
26400 e = XVECEXP (p, 0, 1);
26401 e = XEXP (SET_DEST (e), 0);
26402 if (GET_CODE (e) == PLUS)
26403 padfirst = INTVAL (XEXP (e, 1));
26404 gcc_assert (padfirst == 0 || optimize_size);
26405 /* The function prologue may also push pc, but not annotate it as it is
26406 never restored. We turn this into a stack pointer adjustment. */
26407 e = XVECEXP (p, 0, nregs);
26408 e = XEXP (SET_DEST (e), 0);
26409 if (GET_CODE (e) == PLUS)
26410 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
26411 else
26412 padlast = offset - 4;
26413 gcc_assert (padlast == 0 || padlast == 4);
26414 if (padlast == 4)
26415 fprintf (asm_out_file, "\t.pad #4\n");
26416 reg_size = 4;
26417 fprintf (asm_out_file, "\t.save {");
26419 else if (IS_VFP_REGNUM (reg))
26421 reg_size = 8;
26422 fprintf (asm_out_file, "\t.vsave {");
26424 else
26425 /* Unknown register type. */
26426 gcc_unreachable ();
26428 /* If the stack increment doesn't match the size of the saved registers,
26429 something has gone horribly wrong. */
26430 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
26432 offset = padfirst;
26433 lastreg = 0;
26434 /* The remaining insns will describe the stores. */
26435 for (i = 1; i <= nregs; i++)
26437 /* Expect (set (mem <addr>) (reg)).
26438 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
26439 e = XVECEXP (p, 0, i);
26440 gcc_assert (GET_CODE (e) == SET
26441 && MEM_P (SET_DEST (e))
26442 && REG_P (SET_SRC (e)));
26444 reg = REGNO (SET_SRC (e));
26445 gcc_assert (reg >= lastreg);
26447 if (i != 1)
26448 fprintf (asm_out_file, ", ");
26449 /* We can't use %r for vfp because we need to use the
26450 double precision register names. */
26451 if (IS_VFP_REGNUM (reg))
26452 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
26453 else
26454 asm_fprintf (asm_out_file, "%r", reg);
26456 #ifdef ENABLE_CHECKING
26457 /* Check that the addresses are consecutive. */
26458 e = XEXP (SET_DEST (e), 0);
26459 if (GET_CODE (e) == PLUS)
26460 gcc_assert (REG_P (XEXP (e, 0))
26461 && REGNO (XEXP (e, 0)) == SP_REGNUM
26462 && CONST_INT_P (XEXP (e, 1))
26463 && offset == INTVAL (XEXP (e, 1)));
26464 else
26465 gcc_assert (i == 1
26466 && REG_P (e)
26467 && REGNO (e) == SP_REGNUM);
26468 offset += reg_size;
26469 #endif
26471 fprintf (asm_out_file, "}\n");
26472 if (padfirst)
26473 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
26476 /* Emit unwind directives for a SET. */
26478 static void
26479 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
26481 rtx e0;
26482 rtx e1;
26483 unsigned reg;
26485 e0 = XEXP (p, 0);
26486 e1 = XEXP (p, 1);
26487 switch (GET_CODE (e0))
26489 case MEM:
26490 /* Pushing a single register. */
26491 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
26492 || !REG_P (XEXP (XEXP (e0, 0), 0))
26493 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
26494 abort ();
26496 asm_fprintf (asm_out_file, "\t.save ");
26497 if (IS_VFP_REGNUM (REGNO (e1)))
26498 asm_fprintf(asm_out_file, "{d%d}\n",
26499 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
26500 else
26501 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
26502 break;
26504 case REG:
26505 if (REGNO (e0) == SP_REGNUM)
26507 /* A stack increment. */
26508 if (GET_CODE (e1) != PLUS
26509 || !REG_P (XEXP (e1, 0))
26510 || REGNO (XEXP (e1, 0)) != SP_REGNUM
26511 || !CONST_INT_P (XEXP (e1, 1)))
26512 abort ();
26514 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
26515 -INTVAL (XEXP (e1, 1)));
26517 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
26519 HOST_WIDE_INT offset;
26521 if (GET_CODE (e1) == PLUS)
26523 if (!REG_P (XEXP (e1, 0))
26524 || !CONST_INT_P (XEXP (e1, 1)))
26525 abort ();
26526 reg = REGNO (XEXP (e1, 0));
26527 offset = INTVAL (XEXP (e1, 1));
26528 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
26529 HARD_FRAME_POINTER_REGNUM, reg,
26530 offset);
26532 else if (REG_P (e1))
26534 reg = REGNO (e1);
26535 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
26536 HARD_FRAME_POINTER_REGNUM, reg);
26538 else
26539 abort ();
26541 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
26543 /* Move from sp to reg. */
26544 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
26546 else if (GET_CODE (e1) == PLUS
26547 && REG_P (XEXP (e1, 0))
26548 && REGNO (XEXP (e1, 0)) == SP_REGNUM
26549 && CONST_INT_P (XEXP (e1, 1)))
26551 /* Set reg to offset from sp. */
26552 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
26553 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
26555 else
26556 abort ();
26557 break;
26559 default:
26560 abort ();
26565 /* Emit unwind directives for the given insn. */
26567 static void
26568 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
26570 rtx note, pat;
26571 bool handled_one = false;
26573 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26574 return;
26576 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26577 && (TREE_NOTHROW (current_function_decl)
26578 || crtl->all_throwers_are_sibcalls))
26579 return;
26581 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
26582 return;
26584 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
26586 switch (REG_NOTE_KIND (note))
26588 case REG_FRAME_RELATED_EXPR:
26589 pat = XEXP (note, 0);
26590 goto found;
26592 case REG_CFA_REGISTER:
26593 pat = XEXP (note, 0);
26594 if (pat == NULL)
26596 pat = PATTERN (insn);
26597 if (GET_CODE (pat) == PARALLEL)
26598 pat = XVECEXP (pat, 0, 0);
26601 /* Only emitted for IS_STACKALIGN re-alignment. */
26603 rtx dest, src;
26604 unsigned reg;
26606 src = SET_SRC (pat);
26607 dest = SET_DEST (pat);
26609 gcc_assert (src == stack_pointer_rtx);
26610 reg = REGNO (dest);
26611 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26612 reg + 0x90, reg);
26614 handled_one = true;
26615 break;
26617 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
26618 to get correct dwarf information for shrink-wrap. We should not
26619 emit unwind information for it because these are used either for
26620 pretend arguments or notes to adjust sp and restore registers from
26621 stack. */
26622 case REG_CFA_DEF_CFA:
26623 case REG_CFA_ADJUST_CFA:
26624 case REG_CFA_RESTORE:
26625 return;
26627 case REG_CFA_EXPRESSION:
26628 case REG_CFA_OFFSET:
26629 /* ??? Only handling here what we actually emit. */
26630 gcc_unreachable ();
26632 default:
26633 break;
26636 if (handled_one)
26637 return;
26638 pat = PATTERN (insn);
26639 found:
26641 switch (GET_CODE (pat))
26643 case SET:
26644 arm_unwind_emit_set (asm_out_file, pat);
26645 break;
26647 case SEQUENCE:
26648 /* Store multiple. */
26649 arm_unwind_emit_sequence (asm_out_file, pat);
26650 break;
26652 default:
26653 abort();
26658 /* Output a reference from a function exception table to the type_info
26659 object X. The EABI specifies that the symbol should be relocated by
26660 an R_ARM_TARGET2 relocation. */
26662 static bool
26663 arm_output_ttype (rtx x)
26665 fputs ("\t.word\t", asm_out_file);
26666 output_addr_const (asm_out_file, x);
26667 /* Use special relocations for symbol references. */
26668 if (!CONST_INT_P (x))
26669 fputs ("(TARGET2)", asm_out_file);
26670 fputc ('\n', asm_out_file);
26672 return TRUE;
26675 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
26677 static void
26678 arm_asm_emit_except_personality (rtx personality)
26680 fputs ("\t.personality\t", asm_out_file);
26681 output_addr_const (asm_out_file, personality);
26682 fputc ('\n', asm_out_file);
26685 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
26687 static void
26688 arm_asm_init_sections (void)
26690 exception_section = get_unnamed_section (0, output_section_asm_op,
26691 "\t.handlerdata");
26693 #endif /* ARM_UNWIND_INFO */
26695 /* Output unwind directives for the start/end of a function. */
26697 void
26698 arm_output_fn_unwind (FILE * f, bool prologue)
26700 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26701 return;
26703 if (prologue)
26704 fputs ("\t.fnstart\n", f);
26705 else
26707 /* If this function will never be unwound, then mark it as such.
26708 The came condition is used in arm_unwind_emit to suppress
26709 the frame annotations. */
26710 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26711 && (TREE_NOTHROW (current_function_decl)
26712 || crtl->all_throwers_are_sibcalls))
26713 fputs("\t.cantunwind\n", f);
26715 fputs ("\t.fnend\n", f);
26719 static bool
26720 arm_emit_tls_decoration (FILE *fp, rtx x)
26722 enum tls_reloc reloc;
26723 rtx val;
26725 val = XVECEXP (x, 0, 0);
26726 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
26728 output_addr_const (fp, val);
26730 switch (reloc)
26732 case TLS_GD32:
26733 fputs ("(tlsgd)", fp);
26734 break;
26735 case TLS_LDM32:
26736 fputs ("(tlsldm)", fp);
26737 break;
26738 case TLS_LDO32:
26739 fputs ("(tlsldo)", fp);
26740 break;
26741 case TLS_IE32:
26742 fputs ("(gottpoff)", fp);
26743 break;
26744 case TLS_LE32:
26745 fputs ("(tpoff)", fp);
26746 break;
26747 case TLS_DESCSEQ:
26748 fputs ("(tlsdesc)", fp);
26749 break;
26750 default:
26751 gcc_unreachable ();
26754 switch (reloc)
26756 case TLS_GD32:
26757 case TLS_LDM32:
26758 case TLS_IE32:
26759 case TLS_DESCSEQ:
26760 fputs (" + (. - ", fp);
26761 output_addr_const (fp, XVECEXP (x, 0, 2));
26762 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
26763 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
26764 output_addr_const (fp, XVECEXP (x, 0, 3));
26765 fputc (')', fp);
26766 break;
26767 default:
26768 break;
26771 return TRUE;
26774 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
26776 static void
26777 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
26779 gcc_assert (size == 4);
26780 fputs ("\t.word\t", file);
26781 output_addr_const (file, x);
26782 fputs ("(tlsldo)", file);
26785 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
26787 static bool
26788 arm_output_addr_const_extra (FILE *fp, rtx x)
26790 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
26791 return arm_emit_tls_decoration (fp, x);
26792 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
26794 char label[256];
26795 int labelno = INTVAL (XVECEXP (x, 0, 0));
26797 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
26798 assemble_name_raw (fp, label);
26800 return TRUE;
26802 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
26804 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
26805 if (GOT_PCREL)
26806 fputs ("+.", fp);
26807 fputs ("-(", fp);
26808 output_addr_const (fp, XVECEXP (x, 0, 0));
26809 fputc (')', fp);
26810 return TRUE;
26812 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
26814 output_addr_const (fp, XVECEXP (x, 0, 0));
26815 if (GOT_PCREL)
26816 fputs ("+.", fp);
26817 fputs ("-(", fp);
26818 output_addr_const (fp, XVECEXP (x, 0, 1));
26819 fputc (')', fp);
26820 return TRUE;
26822 else if (GET_CODE (x) == CONST_VECTOR)
26823 return arm_emit_vector_const (fp, x);
26825 return FALSE;
26828 /* Output assembly for a shift instruction.
26829 SET_FLAGS determines how the instruction modifies the condition codes.
26830 0 - Do not set condition codes.
26831 1 - Set condition codes.
26832 2 - Use smallest instruction. */
26833 const char *
26834 arm_output_shift(rtx * operands, int set_flags)
26836 char pattern[100];
26837 static const char flag_chars[3] = {'?', '.', '!'};
26838 const char *shift;
26839 HOST_WIDE_INT val;
26840 char c;
26842 c = flag_chars[set_flags];
26843 if (TARGET_UNIFIED_ASM)
26845 shift = shift_op(operands[3], &val);
26846 if (shift)
26848 if (val != -1)
26849 operands[2] = GEN_INT(val);
26850 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
26852 else
26853 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
26855 else
26856 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
26857 output_asm_insn (pattern, operands);
26858 return "";
26861 /* Output assembly for a WMMX immediate shift instruction. */
26862 const char *
26863 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
26865 int shift = INTVAL (operands[2]);
26866 char templ[50];
26867 machine_mode opmode = GET_MODE (operands[0]);
26869 gcc_assert (shift >= 0);
26871 /* If the shift value in the register versions is > 63 (for D qualifier),
26872 31 (for W qualifier) or 15 (for H qualifier). */
26873 if (((opmode == V4HImode) && (shift > 15))
26874 || ((opmode == V2SImode) && (shift > 31))
26875 || ((opmode == DImode) && (shift > 63)))
26877 if (wror_or_wsra)
26879 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26880 output_asm_insn (templ, operands);
26881 if (opmode == DImode)
26883 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
26884 output_asm_insn (templ, operands);
26887 else
26889 /* The destination register will contain all zeros. */
26890 sprintf (templ, "wzero\t%%0");
26891 output_asm_insn (templ, operands);
26893 return "";
26896 if ((opmode == DImode) && (shift > 32))
26898 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26899 output_asm_insn (templ, operands);
26900 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
26901 output_asm_insn (templ, operands);
26903 else
26905 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
26906 output_asm_insn (templ, operands);
26908 return "";
26911 /* Output assembly for a WMMX tinsr instruction. */
26912 const char *
26913 arm_output_iwmmxt_tinsr (rtx *operands)
26915 int mask = INTVAL (operands[3]);
26916 int i;
26917 char templ[50];
26918 int units = mode_nunits[GET_MODE (operands[0])];
26919 gcc_assert ((mask & (mask - 1)) == 0);
26920 for (i = 0; i < units; ++i)
26922 if ((mask & 0x01) == 1)
26924 break;
26926 mask >>= 1;
26928 gcc_assert (i < units);
26930 switch (GET_MODE (operands[0]))
26932 case V8QImode:
26933 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
26934 break;
26935 case V4HImode:
26936 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
26937 break;
26938 case V2SImode:
26939 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
26940 break;
26941 default:
26942 gcc_unreachable ();
26943 break;
26945 output_asm_insn (templ, operands);
26947 return "";
26950 /* Output a Thumb-1 casesi dispatch sequence. */
26951 const char *
26952 thumb1_output_casesi (rtx *operands)
26954 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
26956 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
26958 switch (GET_MODE(diff_vec))
26960 case QImode:
26961 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
26962 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
26963 case HImode:
26964 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
26965 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
26966 case SImode:
26967 return "bl\t%___gnu_thumb1_case_si";
26968 default:
26969 gcc_unreachable ();
26973 /* Output a Thumb-2 casesi instruction. */
26974 const char *
26975 thumb2_output_casesi (rtx *operands)
26977 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
26979 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
26981 output_asm_insn ("cmp\t%0, %1", operands);
26982 output_asm_insn ("bhi\t%l3", operands);
26983 switch (GET_MODE(diff_vec))
26985 case QImode:
26986 return "tbb\t[%|pc, %0]";
26987 case HImode:
26988 return "tbh\t[%|pc, %0, lsl #1]";
26989 case SImode:
26990 if (flag_pic)
26992 output_asm_insn ("adr\t%4, %l2", operands);
26993 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
26994 output_asm_insn ("add\t%4, %4, %5", operands);
26995 return "bx\t%4";
26997 else
26999 output_asm_insn ("adr\t%4, %l2", operands);
27000 return "ldr\t%|pc, [%4, %0, lsl #2]";
27002 default:
27003 gcc_unreachable ();
27007 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27008 per-core tuning structs. */
27009 static int
27010 arm_issue_rate (void)
27012 return current_tune->issue_rate;
27015 /* Return how many instructions should scheduler lookahead to choose the
27016 best one. */
27017 static int
27018 arm_first_cycle_multipass_dfa_lookahead (void)
27020 int issue_rate = arm_issue_rate ();
27022 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27025 /* Enable modeling of L2 auto-prefetcher. */
27026 static int
27027 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27029 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27032 const char *
27033 arm_mangle_type (const_tree type)
27035 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27036 has to be managled as if it is in the "std" namespace. */
27037 if (TARGET_AAPCS_BASED
27038 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27039 return "St9__va_list";
27041 /* Half-precision float. */
27042 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27043 return "Dh";
27045 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27046 builtin type. */
27047 if (TYPE_NAME (type) != NULL)
27048 return arm_mangle_builtin_type (type);
27050 /* Use the default mangling. */
27051 return NULL;
27054 /* Order of allocation of core registers for Thumb: this allocation is
27055 written over the corresponding initial entries of the array
27056 initialized with REG_ALLOC_ORDER. We allocate all low registers
27057 first. Saving and restoring a low register is usually cheaper than
27058 using a call-clobbered high register. */
27060 static const int thumb_core_reg_alloc_order[] =
27062 3, 2, 1, 0, 4, 5, 6, 7,
27063 14, 12, 8, 9, 10, 11
27066 /* Adjust register allocation order when compiling for Thumb. */
27068 void
27069 arm_order_regs_for_local_alloc (void)
27071 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27072 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27073 if (TARGET_THUMB)
27074 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27075 sizeof (thumb_core_reg_alloc_order));
27078 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27080 bool
27081 arm_frame_pointer_required (void)
27083 return (cfun->has_nonlocal_label
27084 || SUBTARGET_FRAME_POINTER_REQUIRED
27085 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
27088 /* Only thumb1 can't support conditional execution, so return true if
27089 the target is not thumb1. */
27090 static bool
27091 arm_have_conditional_execution (void)
27093 return !TARGET_THUMB1;
27096 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27097 static HOST_WIDE_INT
27098 arm_vector_alignment (const_tree type)
27100 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27102 if (TARGET_AAPCS_BASED)
27103 align = MIN (align, 64);
27105 return align;
27108 static unsigned int
27109 arm_autovectorize_vector_sizes (void)
27111 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27114 static bool
27115 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27117 /* Vectors which aren't in packed structures will not be less aligned than
27118 the natural alignment of their element type, so this is safe. */
27119 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27120 return !is_packed;
27122 return default_builtin_vector_alignment_reachable (type, is_packed);
27125 static bool
27126 arm_builtin_support_vector_misalignment (machine_mode mode,
27127 const_tree type, int misalignment,
27128 bool is_packed)
27130 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27132 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27134 if (is_packed)
27135 return align == 1;
27137 /* If the misalignment is unknown, we should be able to handle the access
27138 so long as it is not to a member of a packed data structure. */
27139 if (misalignment == -1)
27140 return true;
27142 /* Return true if the misalignment is a multiple of the natural alignment
27143 of the vector's element type. This is probably always going to be
27144 true in practice, since we've already established that this isn't a
27145 packed access. */
27146 return ((misalignment % align) == 0);
27149 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27150 is_packed);
27153 static void
27154 arm_conditional_register_usage (void)
27156 int regno;
27158 if (TARGET_THUMB1 && optimize_size)
27160 /* When optimizing for size on Thumb-1, it's better not
27161 to use the HI regs, because of the overhead of
27162 stacking them. */
27163 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27164 fixed_regs[regno] = call_used_regs[regno] = 1;
27167 /* The link register can be clobbered by any branch insn,
27168 but we have no way to track that at present, so mark
27169 it as unavailable. */
27170 if (TARGET_THUMB1)
27171 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27173 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
27175 /* VFPv3 registers are disabled when earlier VFP
27176 versions are selected due to the definition of
27177 LAST_VFP_REGNUM. */
27178 for (regno = FIRST_VFP_REGNUM;
27179 regno <= LAST_VFP_REGNUM; ++ regno)
27181 fixed_regs[regno] = 0;
27182 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27183 || regno >= FIRST_VFP_REGNUM + 32;
27187 if (TARGET_REALLY_IWMMXT)
27189 regno = FIRST_IWMMXT_GR_REGNUM;
27190 /* The 2002/10/09 revision of the XScale ABI has wCG0
27191 and wCG1 as call-preserved registers. The 2002/11/21
27192 revision changed this so that all wCG registers are
27193 scratch registers. */
27194 for (regno = FIRST_IWMMXT_GR_REGNUM;
27195 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27196 fixed_regs[regno] = 0;
27197 /* The XScale ABI has wR0 - wR9 as scratch registers,
27198 the rest as call-preserved registers. */
27199 for (regno = FIRST_IWMMXT_REGNUM;
27200 regno <= LAST_IWMMXT_REGNUM; ++ regno)
27202 fixed_regs[regno] = 0;
27203 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27207 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27209 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27210 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27212 else if (TARGET_APCS_STACK)
27214 fixed_regs[10] = 1;
27215 call_used_regs[10] = 1;
27217 /* -mcaller-super-interworking reserves r11 for calls to
27218 _interwork_r11_call_via_rN(). Making the register global
27219 is an easy way of ensuring that it remains valid for all
27220 calls. */
27221 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27222 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27224 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27225 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27226 if (TARGET_CALLER_INTERWORKING)
27227 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27229 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27232 static reg_class_t
27233 arm_preferred_rename_class (reg_class_t rclass)
27235 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27236 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27237 and code size can be reduced. */
27238 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27239 return LO_REGS;
27240 else
27241 return NO_REGS;
27244 /* Compute the atrribute "length" of insn "*push_multi".
27245 So this function MUST be kept in sync with that insn pattern. */
27247 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27249 int i, regno, hi_reg;
27250 int num_saves = XVECLEN (parallel_op, 0);
27252 /* ARM mode. */
27253 if (TARGET_ARM)
27254 return 4;
27255 /* Thumb1 mode. */
27256 if (TARGET_THUMB1)
27257 return 2;
27259 /* Thumb2 mode. */
27260 regno = REGNO (first_op);
27261 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27262 for (i = 1; i < num_saves && !hi_reg; i++)
27264 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27265 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27268 if (!hi_reg)
27269 return 2;
27270 return 4;
27273 /* Compute the number of instructions emitted by output_move_double. */
27275 arm_count_output_move_double_insns (rtx *operands)
27277 int count;
27278 rtx ops[2];
27279 /* output_move_double may modify the operands array, so call it
27280 here on a copy of the array. */
27281 ops[0] = operands[0];
27282 ops[1] = operands[1];
27283 output_move_double (ops, false, &count);
27284 return count;
27288 vfp3_const_double_for_fract_bits (rtx operand)
27290 REAL_VALUE_TYPE r0;
27292 if (!CONST_DOUBLE_P (operand))
27293 return 0;
27295 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27296 if (exact_real_inverse (DFmode, &r0))
27298 if (exact_real_truncate (DFmode, &r0))
27300 HOST_WIDE_INT value = real_to_integer (&r0);
27301 value = value & 0xffffffff;
27302 if ((value != 0) && ( (value & (value - 1)) == 0))
27303 return int_log2 (value);
27306 return 0;
27310 vfp3_const_double_for_bits (rtx operand)
27312 REAL_VALUE_TYPE r0;
27314 if (!CONST_DOUBLE_P (operand))
27315 return 0;
27317 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27318 if (exact_real_truncate (DFmode, &r0))
27320 HOST_WIDE_INT value = real_to_integer (&r0);
27321 value = value & 0xffffffff;
27322 if ((value != 0) && ( (value & (value - 1)) == 0))
27323 return int_log2 (value);
27326 return 0;
27329 /* Emit a memory barrier around an atomic sequence according to MODEL. */
27331 static void
27332 arm_pre_atomic_barrier (enum memmodel model)
27334 if (need_atomic_barrier_p (model, true))
27335 emit_insn (gen_memory_barrier ());
27338 static void
27339 arm_post_atomic_barrier (enum memmodel model)
27341 if (need_atomic_barrier_p (model, false))
27342 emit_insn (gen_memory_barrier ());
27345 /* Emit the load-exclusive and store-exclusive instructions.
27346 Use acquire and release versions if necessary. */
27348 static void
27349 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
27351 rtx (*gen) (rtx, rtx);
27353 if (acq)
27355 switch (mode)
27357 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
27358 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
27359 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
27360 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
27361 default:
27362 gcc_unreachable ();
27365 else
27367 switch (mode)
27369 case QImode: gen = gen_arm_load_exclusiveqi; break;
27370 case HImode: gen = gen_arm_load_exclusivehi; break;
27371 case SImode: gen = gen_arm_load_exclusivesi; break;
27372 case DImode: gen = gen_arm_load_exclusivedi; break;
27373 default:
27374 gcc_unreachable ();
27378 emit_insn (gen (rval, mem));
27381 static void
27382 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
27383 rtx mem, bool rel)
27385 rtx (*gen) (rtx, rtx, rtx);
27387 if (rel)
27389 switch (mode)
27391 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
27392 case HImode: gen = gen_arm_store_release_exclusivehi; break;
27393 case SImode: gen = gen_arm_store_release_exclusivesi; break;
27394 case DImode: gen = gen_arm_store_release_exclusivedi; break;
27395 default:
27396 gcc_unreachable ();
27399 else
27401 switch (mode)
27403 case QImode: gen = gen_arm_store_exclusiveqi; break;
27404 case HImode: gen = gen_arm_store_exclusivehi; break;
27405 case SImode: gen = gen_arm_store_exclusivesi; break;
27406 case DImode: gen = gen_arm_store_exclusivedi; break;
27407 default:
27408 gcc_unreachable ();
27412 emit_insn (gen (bval, rval, mem));
27415 /* Mark the previous jump instruction as unlikely. */
27417 static void
27418 emit_unlikely_jump (rtx insn)
27420 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
27422 insn = emit_jump_insn (insn);
27423 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
27426 /* Expand a compare and swap pattern. */
27428 void
27429 arm_expand_compare_and_swap (rtx operands[])
27431 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
27432 machine_mode mode;
27433 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
27435 bval = operands[0];
27436 rval = operands[1];
27437 mem = operands[2];
27438 oldval = operands[3];
27439 newval = operands[4];
27440 is_weak = operands[5];
27441 mod_s = operands[6];
27442 mod_f = operands[7];
27443 mode = GET_MODE (mem);
27445 /* Normally the succ memory model must be stronger than fail, but in the
27446 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27447 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
27449 if (TARGET_HAVE_LDACQ
27450 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
27451 && INTVAL (mod_s) == MEMMODEL_RELEASE)
27452 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
27454 switch (mode)
27456 case QImode:
27457 case HImode:
27458 /* For narrow modes, we're going to perform the comparison in SImode,
27459 so do the zero-extension now. */
27460 rval = gen_reg_rtx (SImode);
27461 oldval = convert_modes (SImode, mode, oldval, true);
27462 /* FALLTHRU */
27464 case SImode:
27465 /* Force the value into a register if needed. We waited until after
27466 the zero-extension above to do this properly. */
27467 if (!arm_add_operand (oldval, SImode))
27468 oldval = force_reg (SImode, oldval);
27469 break;
27471 case DImode:
27472 if (!cmpdi_operand (oldval, mode))
27473 oldval = force_reg (mode, oldval);
27474 break;
27476 default:
27477 gcc_unreachable ();
27480 switch (mode)
27482 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
27483 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
27484 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
27485 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
27486 default:
27487 gcc_unreachable ();
27490 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
27492 if (mode == QImode || mode == HImode)
27493 emit_move_insn (operands[1], gen_lowpart (mode, rval));
27495 /* In all cases, we arrange for success to be signaled by Z set.
27496 This arrangement allows for the boolean result to be used directly
27497 in a subsequent branch, post optimization. */
27498 x = gen_rtx_REG (CCmode, CC_REGNUM);
27499 x = gen_rtx_EQ (SImode, x, const0_rtx);
27500 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
27503 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
27504 another memory store between the load-exclusive and store-exclusive can
27505 reset the monitor from Exclusive to Open state. This means we must wait
27506 until after reload to split the pattern, lest we get a register spill in
27507 the middle of the atomic sequence. */
27509 void
27510 arm_split_compare_and_swap (rtx operands[])
27512 rtx rval, mem, oldval, newval, scratch;
27513 machine_mode mode;
27514 enum memmodel mod_s, mod_f;
27515 bool is_weak;
27516 rtx_code_label *label1, *label2;
27517 rtx x, cond;
27519 rval = operands[0];
27520 mem = operands[1];
27521 oldval = operands[2];
27522 newval = operands[3];
27523 is_weak = (operands[4] != const0_rtx);
27524 mod_s = (enum memmodel) INTVAL (operands[5]);
27525 mod_f = (enum memmodel) INTVAL (operands[6]);
27526 scratch = operands[7];
27527 mode = GET_MODE (mem);
27529 bool use_acquire = TARGET_HAVE_LDACQ
27530 && !(mod_s == MEMMODEL_RELAXED
27531 || mod_s == MEMMODEL_CONSUME
27532 || mod_s == MEMMODEL_RELEASE);
27534 bool use_release = TARGET_HAVE_LDACQ
27535 && !(mod_s == MEMMODEL_RELAXED
27536 || mod_s == MEMMODEL_CONSUME
27537 || mod_s == MEMMODEL_ACQUIRE);
27539 /* Checks whether a barrier is needed and emits one accordingly. */
27540 if (!(use_acquire || use_release))
27541 arm_pre_atomic_barrier (mod_s);
27543 label1 = NULL;
27544 if (!is_weak)
27546 label1 = gen_label_rtx ();
27547 emit_label (label1);
27549 label2 = gen_label_rtx ();
27551 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
27553 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
27554 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27555 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27556 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
27557 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
27559 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
27561 /* Weak or strong, we want EQ to be true for success, so that we
27562 match the flags that we got from the compare above. */
27563 cond = gen_rtx_REG (CCmode, CC_REGNUM);
27564 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
27565 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
27567 if (!is_weak)
27569 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27570 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27571 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
27572 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
27575 if (mod_f != MEMMODEL_RELAXED)
27576 emit_label (label2);
27578 /* Checks whether a barrier is needed and emits one accordingly. */
27579 if (!(use_acquire || use_release))
27580 arm_post_atomic_barrier (mod_s);
27582 if (mod_f == MEMMODEL_RELAXED)
27583 emit_label (label2);
27586 void
27587 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
27588 rtx value, rtx model_rtx, rtx cond)
27590 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
27591 machine_mode mode = GET_MODE (mem);
27592 machine_mode wmode = (mode == DImode ? DImode : SImode);
27593 rtx_code_label *label;
27594 rtx x;
27596 bool use_acquire = TARGET_HAVE_LDACQ
27597 && !(model == MEMMODEL_RELAXED
27598 || model == MEMMODEL_CONSUME
27599 || model == MEMMODEL_RELEASE);
27601 bool use_release = TARGET_HAVE_LDACQ
27602 && !(model == MEMMODEL_RELAXED
27603 || model == MEMMODEL_CONSUME
27604 || model == MEMMODEL_ACQUIRE);
27606 /* Checks whether a barrier is needed and emits one accordingly. */
27607 if (!(use_acquire || use_release))
27608 arm_pre_atomic_barrier (model);
27610 label = gen_label_rtx ();
27611 emit_label (label);
27613 if (new_out)
27614 new_out = gen_lowpart (wmode, new_out);
27615 if (old_out)
27616 old_out = gen_lowpart (wmode, old_out);
27617 else
27618 old_out = new_out;
27619 value = simplify_gen_subreg (wmode, value, mode, 0);
27621 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
27623 switch (code)
27625 case SET:
27626 new_out = value;
27627 break;
27629 case NOT:
27630 x = gen_rtx_AND (wmode, old_out, value);
27631 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27632 x = gen_rtx_NOT (wmode, new_out);
27633 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27634 break;
27636 case MINUS:
27637 if (CONST_INT_P (value))
27639 value = GEN_INT (-INTVAL (value));
27640 code = PLUS;
27642 /* FALLTHRU */
27644 case PLUS:
27645 if (mode == DImode)
27647 /* DImode plus/minus need to clobber flags. */
27648 /* The adddi3 and subdi3 patterns are incorrectly written so that
27649 they require matching operands, even when we could easily support
27650 three operands. Thankfully, this can be fixed up post-splitting,
27651 as the individual add+adc patterns do accept three operands and
27652 post-reload cprop can make these moves go away. */
27653 emit_move_insn (new_out, old_out);
27654 if (code == PLUS)
27655 x = gen_adddi3 (new_out, new_out, value);
27656 else
27657 x = gen_subdi3 (new_out, new_out, value);
27658 emit_insn (x);
27659 break;
27661 /* FALLTHRU */
27663 default:
27664 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
27665 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27666 break;
27669 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
27670 use_release);
27672 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27673 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
27675 /* Checks whether a barrier is needed and emits one accordingly. */
27676 if (!(use_acquire || use_release))
27677 arm_post_atomic_barrier (model);
27680 #define MAX_VECT_LEN 16
27682 struct expand_vec_perm_d
27684 rtx target, op0, op1;
27685 unsigned char perm[MAX_VECT_LEN];
27686 machine_mode vmode;
27687 unsigned char nelt;
27688 bool one_vector_p;
27689 bool testing_p;
27692 /* Generate a variable permutation. */
27694 static void
27695 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
27697 machine_mode vmode = GET_MODE (target);
27698 bool one_vector_p = rtx_equal_p (op0, op1);
27700 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
27701 gcc_checking_assert (GET_MODE (op0) == vmode);
27702 gcc_checking_assert (GET_MODE (op1) == vmode);
27703 gcc_checking_assert (GET_MODE (sel) == vmode);
27704 gcc_checking_assert (TARGET_NEON);
27706 if (one_vector_p)
27708 if (vmode == V8QImode)
27709 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
27710 else
27711 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
27713 else
27715 rtx pair;
27717 if (vmode == V8QImode)
27719 pair = gen_reg_rtx (V16QImode);
27720 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
27721 pair = gen_lowpart (TImode, pair);
27722 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
27724 else
27726 pair = gen_reg_rtx (OImode);
27727 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
27728 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
27733 void
27734 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
27736 machine_mode vmode = GET_MODE (target);
27737 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
27738 bool one_vector_p = rtx_equal_p (op0, op1);
27739 rtx rmask[MAX_VECT_LEN], mask;
27741 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
27742 numbering of elements for big-endian, we must reverse the order. */
27743 gcc_checking_assert (!BYTES_BIG_ENDIAN);
27745 /* The VTBL instruction does not use a modulo index, so we must take care
27746 of that ourselves. */
27747 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
27748 for (i = 0; i < nelt; ++i)
27749 rmask[i] = mask;
27750 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
27751 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
27753 arm_expand_vec_perm_1 (target, op0, op1, sel);
27756 /* Generate or test for an insn that supports a constant permutation. */
27758 /* Recognize patterns for the VUZP insns. */
27760 static bool
27761 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
27763 unsigned int i, odd, mask, nelt = d->nelt;
27764 rtx out0, out1, in0, in1, x;
27765 rtx (*gen)(rtx, rtx, rtx, rtx);
27767 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27768 return false;
27770 /* Note that these are little-endian tests. Adjust for big-endian later. */
27771 if (d->perm[0] == 0)
27772 odd = 0;
27773 else if (d->perm[0] == 1)
27774 odd = 1;
27775 else
27776 return false;
27777 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27779 for (i = 0; i < nelt; i++)
27781 unsigned elt = (i * 2 + odd) & mask;
27782 if (d->perm[i] != elt)
27783 return false;
27786 /* Success! */
27787 if (d->testing_p)
27788 return true;
27790 switch (d->vmode)
27792 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
27793 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
27794 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
27795 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
27796 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
27797 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
27798 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
27799 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
27800 default:
27801 gcc_unreachable ();
27804 in0 = d->op0;
27805 in1 = d->op1;
27806 if (BYTES_BIG_ENDIAN)
27808 x = in0, in0 = in1, in1 = x;
27809 odd = !odd;
27812 out0 = d->target;
27813 out1 = gen_reg_rtx (d->vmode);
27814 if (odd)
27815 x = out0, out0 = out1, out1 = x;
27817 emit_insn (gen (out0, in0, in1, out1));
27818 return true;
27821 /* Recognize patterns for the VZIP insns. */
27823 static bool
27824 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
27826 unsigned int i, high, mask, nelt = d->nelt;
27827 rtx out0, out1, in0, in1, x;
27828 rtx (*gen)(rtx, rtx, rtx, rtx);
27830 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27831 return false;
27833 /* Note that these are little-endian tests. Adjust for big-endian later. */
27834 high = nelt / 2;
27835 if (d->perm[0] == high)
27837 else if (d->perm[0] == 0)
27838 high = 0;
27839 else
27840 return false;
27841 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27843 for (i = 0; i < nelt / 2; i++)
27845 unsigned elt = (i + high) & mask;
27846 if (d->perm[i * 2] != elt)
27847 return false;
27848 elt = (elt + nelt) & mask;
27849 if (d->perm[i * 2 + 1] != elt)
27850 return false;
27853 /* Success! */
27854 if (d->testing_p)
27855 return true;
27857 switch (d->vmode)
27859 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
27860 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
27861 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
27862 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
27863 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
27864 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
27865 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
27866 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
27867 default:
27868 gcc_unreachable ();
27871 in0 = d->op0;
27872 in1 = d->op1;
27873 if (BYTES_BIG_ENDIAN)
27875 x = in0, in0 = in1, in1 = x;
27876 high = !high;
27879 out0 = d->target;
27880 out1 = gen_reg_rtx (d->vmode);
27881 if (high)
27882 x = out0, out0 = out1, out1 = x;
27884 emit_insn (gen (out0, in0, in1, out1));
27885 return true;
27888 /* Recognize patterns for the VREV insns. */
27890 static bool
27891 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
27893 unsigned int i, j, diff, nelt = d->nelt;
27894 rtx (*gen)(rtx, rtx);
27896 if (!d->one_vector_p)
27897 return false;
27899 diff = d->perm[0];
27900 switch (diff)
27902 case 7:
27903 switch (d->vmode)
27905 case V16QImode: gen = gen_neon_vrev64v16qi; break;
27906 case V8QImode: gen = gen_neon_vrev64v8qi; break;
27907 default:
27908 return false;
27910 break;
27911 case 3:
27912 switch (d->vmode)
27914 case V16QImode: gen = gen_neon_vrev32v16qi; break;
27915 case V8QImode: gen = gen_neon_vrev32v8qi; break;
27916 case V8HImode: gen = gen_neon_vrev64v8hi; break;
27917 case V4HImode: gen = gen_neon_vrev64v4hi; break;
27918 default:
27919 return false;
27921 break;
27922 case 1:
27923 switch (d->vmode)
27925 case V16QImode: gen = gen_neon_vrev16v16qi; break;
27926 case V8QImode: gen = gen_neon_vrev16v8qi; break;
27927 case V8HImode: gen = gen_neon_vrev32v8hi; break;
27928 case V4HImode: gen = gen_neon_vrev32v4hi; break;
27929 case V4SImode: gen = gen_neon_vrev64v4si; break;
27930 case V2SImode: gen = gen_neon_vrev64v2si; break;
27931 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
27932 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
27933 default:
27934 return false;
27936 break;
27937 default:
27938 return false;
27941 for (i = 0; i < nelt ; i += diff + 1)
27942 for (j = 0; j <= diff; j += 1)
27944 /* This is guaranteed to be true as the value of diff
27945 is 7, 3, 1 and we should have enough elements in the
27946 queue to generate this. Getting a vector mask with a
27947 value of diff other than these values implies that
27948 something is wrong by the time we get here. */
27949 gcc_assert (i + j < nelt);
27950 if (d->perm[i + j] != i + diff - j)
27951 return false;
27954 /* Success! */
27955 if (d->testing_p)
27956 return true;
27958 emit_insn (gen (d->target, d->op0));
27959 return true;
27962 /* Recognize patterns for the VTRN insns. */
27964 static bool
27965 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
27967 unsigned int i, odd, mask, nelt = d->nelt;
27968 rtx out0, out1, in0, in1, x;
27969 rtx (*gen)(rtx, rtx, rtx, rtx);
27971 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27972 return false;
27974 /* Note that these are little-endian tests. Adjust for big-endian later. */
27975 if (d->perm[0] == 0)
27976 odd = 0;
27977 else if (d->perm[0] == 1)
27978 odd = 1;
27979 else
27980 return false;
27981 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27983 for (i = 0; i < nelt; i += 2)
27985 if (d->perm[i] != i + odd)
27986 return false;
27987 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
27988 return false;
27991 /* Success! */
27992 if (d->testing_p)
27993 return true;
27995 switch (d->vmode)
27997 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
27998 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
27999 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
28000 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
28001 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
28002 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
28003 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
28004 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
28005 default:
28006 gcc_unreachable ();
28009 in0 = d->op0;
28010 in1 = d->op1;
28011 if (BYTES_BIG_ENDIAN)
28013 x = in0, in0 = in1, in1 = x;
28014 odd = !odd;
28017 out0 = d->target;
28018 out1 = gen_reg_rtx (d->vmode);
28019 if (odd)
28020 x = out0, out0 = out1, out1 = x;
28022 emit_insn (gen (out0, in0, in1, out1));
28023 return true;
28026 /* Recognize patterns for the VEXT insns. */
28028 static bool
28029 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
28031 unsigned int i, nelt = d->nelt;
28032 rtx (*gen) (rtx, rtx, rtx, rtx);
28033 rtx offset;
28035 unsigned int location;
28037 unsigned int next = d->perm[0] + 1;
28039 /* TODO: Handle GCC's numbering of elements for big-endian. */
28040 if (BYTES_BIG_ENDIAN)
28041 return false;
28043 /* Check if the extracted indexes are increasing by one. */
28044 for (i = 1; i < nelt; next++, i++)
28046 /* If we hit the most significant element of the 2nd vector in
28047 the previous iteration, no need to test further. */
28048 if (next == 2 * nelt)
28049 return false;
28051 /* If we are operating on only one vector: it could be a
28052 rotation. If there are only two elements of size < 64, let
28053 arm_evpc_neon_vrev catch it. */
28054 if (d->one_vector_p && (next == nelt))
28056 if ((nelt == 2) && (d->vmode != V2DImode))
28057 return false;
28058 else
28059 next = 0;
28062 if (d->perm[i] != next)
28063 return false;
28066 location = d->perm[0];
28068 switch (d->vmode)
28070 case V16QImode: gen = gen_neon_vextv16qi; break;
28071 case V8QImode: gen = gen_neon_vextv8qi; break;
28072 case V4HImode: gen = gen_neon_vextv4hi; break;
28073 case V8HImode: gen = gen_neon_vextv8hi; break;
28074 case V2SImode: gen = gen_neon_vextv2si; break;
28075 case V4SImode: gen = gen_neon_vextv4si; break;
28076 case V2SFmode: gen = gen_neon_vextv2sf; break;
28077 case V4SFmode: gen = gen_neon_vextv4sf; break;
28078 case V2DImode: gen = gen_neon_vextv2di; break;
28079 default:
28080 return false;
28083 /* Success! */
28084 if (d->testing_p)
28085 return true;
28087 offset = GEN_INT (location);
28088 emit_insn (gen (d->target, d->op0, d->op1, offset));
28089 return true;
28092 /* The NEON VTBL instruction is a fully variable permuation that's even
28093 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28094 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28095 can do slightly better by expanding this as a constant where we don't
28096 have to apply a mask. */
28098 static bool
28099 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
28101 rtx rperm[MAX_VECT_LEN], sel;
28102 machine_mode vmode = d->vmode;
28103 unsigned int i, nelt = d->nelt;
28105 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28106 numbering of elements for big-endian, we must reverse the order. */
28107 if (BYTES_BIG_ENDIAN)
28108 return false;
28110 if (d->testing_p)
28111 return true;
28113 /* Generic code will try constant permutation twice. Once with the
28114 original mode and again with the elements lowered to QImode.
28115 So wait and don't do the selector expansion ourselves. */
28116 if (vmode != V8QImode && vmode != V16QImode)
28117 return false;
28119 for (i = 0; i < nelt; ++i)
28120 rperm[i] = GEN_INT (d->perm[i]);
28121 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
28122 sel = force_reg (vmode, sel);
28124 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
28125 return true;
28128 static bool
28129 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
28131 /* Check if the input mask matches vext before reordering the
28132 operands. */
28133 if (TARGET_NEON)
28134 if (arm_evpc_neon_vext (d))
28135 return true;
28137 /* The pattern matching functions above are written to look for a small
28138 number to begin the sequence (0, 1, N/2). If we begin with an index
28139 from the second operand, we can swap the operands. */
28140 if (d->perm[0] >= d->nelt)
28142 unsigned i, nelt = d->nelt;
28143 rtx x;
28145 for (i = 0; i < nelt; ++i)
28146 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
28148 x = d->op0;
28149 d->op0 = d->op1;
28150 d->op1 = x;
28153 if (TARGET_NEON)
28155 if (arm_evpc_neon_vuzp (d))
28156 return true;
28157 if (arm_evpc_neon_vzip (d))
28158 return true;
28159 if (arm_evpc_neon_vrev (d))
28160 return true;
28161 if (arm_evpc_neon_vtrn (d))
28162 return true;
28163 return arm_evpc_neon_vtbl (d);
28165 return false;
28168 /* Expand a vec_perm_const pattern. */
28170 bool
28171 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
28173 struct expand_vec_perm_d d;
28174 int i, nelt, which;
28176 d.target = target;
28177 d.op0 = op0;
28178 d.op1 = op1;
28180 d.vmode = GET_MODE (target);
28181 gcc_assert (VECTOR_MODE_P (d.vmode));
28182 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28183 d.testing_p = false;
28185 for (i = which = 0; i < nelt; ++i)
28187 rtx e = XVECEXP (sel, 0, i);
28188 int ei = INTVAL (e) & (2 * nelt - 1);
28189 which |= (ei < nelt ? 1 : 2);
28190 d.perm[i] = ei;
28193 switch (which)
28195 default:
28196 gcc_unreachable();
28198 case 3:
28199 d.one_vector_p = false;
28200 if (!rtx_equal_p (op0, op1))
28201 break;
28203 /* The elements of PERM do not suggest that only the first operand
28204 is used, but both operands are identical. Allow easier matching
28205 of the permutation by folding the permutation into the single
28206 input vector. */
28207 /* FALLTHRU */
28208 case 2:
28209 for (i = 0; i < nelt; ++i)
28210 d.perm[i] &= nelt - 1;
28211 d.op0 = op1;
28212 d.one_vector_p = true;
28213 break;
28215 case 1:
28216 d.op1 = op0;
28217 d.one_vector_p = true;
28218 break;
28221 return arm_expand_vec_perm_const_1 (&d);
28224 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
28226 static bool
28227 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
28228 const unsigned char *sel)
28230 struct expand_vec_perm_d d;
28231 unsigned int i, nelt, which;
28232 bool ret;
28234 d.vmode = vmode;
28235 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28236 d.testing_p = true;
28237 memcpy (d.perm, sel, nelt);
28239 /* Categorize the set of elements in the selector. */
28240 for (i = which = 0; i < nelt; ++i)
28242 unsigned char e = d.perm[i];
28243 gcc_assert (e < 2 * nelt);
28244 which |= (e < nelt ? 1 : 2);
28247 /* For all elements from second vector, fold the elements to first. */
28248 if (which == 2)
28249 for (i = 0; i < nelt; ++i)
28250 d.perm[i] -= nelt;
28252 /* Check whether the mask can be applied to the vector type. */
28253 d.one_vector_p = (which != 3);
28255 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
28256 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
28257 if (!d.one_vector_p)
28258 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
28260 start_sequence ();
28261 ret = arm_expand_vec_perm_const_1 (&d);
28262 end_sequence ();
28264 return ret;
28267 bool
28268 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
28270 /* If we are soft float and we do not have ldrd
28271 then all auto increment forms are ok. */
28272 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
28273 return true;
28275 switch (code)
28277 /* Post increment and Pre Decrement are supported for all
28278 instruction forms except for vector forms. */
28279 case ARM_POST_INC:
28280 case ARM_PRE_DEC:
28281 if (VECTOR_MODE_P (mode))
28283 if (code != ARM_PRE_DEC)
28284 return true;
28285 else
28286 return false;
28289 return true;
28291 case ARM_POST_DEC:
28292 case ARM_PRE_INC:
28293 /* Without LDRD and mode size greater than
28294 word size, there is no point in auto-incrementing
28295 because ldm and stm will not have these forms. */
28296 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
28297 return false;
28299 /* Vector and floating point modes do not support
28300 these auto increment forms. */
28301 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
28302 return false;
28304 return true;
28306 default:
28307 return false;
28311 return false;
28314 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28315 on ARM, since we know that shifts by negative amounts are no-ops.
28316 Additionally, the default expansion code is not available or suitable
28317 for post-reload insn splits (this can occur when the register allocator
28318 chooses not to do a shift in NEON).
28320 This function is used in both initial expand and post-reload splits, and
28321 handles all kinds of 64-bit shifts.
28323 Input requirements:
28324 - It is safe for the input and output to be the same register, but
28325 early-clobber rules apply for the shift amount and scratch registers.
28326 - Shift by register requires both scratch registers. In all other cases
28327 the scratch registers may be NULL.
28328 - Ashiftrt by a register also clobbers the CC register. */
28329 void
28330 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
28331 rtx amount, rtx scratch1, rtx scratch2)
28333 rtx out_high = gen_highpart (SImode, out);
28334 rtx out_low = gen_lowpart (SImode, out);
28335 rtx in_high = gen_highpart (SImode, in);
28336 rtx in_low = gen_lowpart (SImode, in);
28338 /* Terminology:
28339 in = the register pair containing the input value.
28340 out = the destination register pair.
28341 up = the high- or low-part of each pair.
28342 down = the opposite part to "up".
28343 In a shift, we can consider bits to shift from "up"-stream to
28344 "down"-stream, so in a left-shift "up" is the low-part and "down"
28345 is the high-part of each register pair. */
28347 rtx out_up = code == ASHIFT ? out_low : out_high;
28348 rtx out_down = code == ASHIFT ? out_high : out_low;
28349 rtx in_up = code == ASHIFT ? in_low : in_high;
28350 rtx in_down = code == ASHIFT ? in_high : in_low;
28352 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
28353 gcc_assert (out
28354 && (REG_P (out) || GET_CODE (out) == SUBREG)
28355 && GET_MODE (out) == DImode);
28356 gcc_assert (in
28357 && (REG_P (in) || GET_CODE (in) == SUBREG)
28358 && GET_MODE (in) == DImode);
28359 gcc_assert (amount
28360 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
28361 && GET_MODE (amount) == SImode)
28362 || CONST_INT_P (amount)));
28363 gcc_assert (scratch1 == NULL
28364 || (GET_CODE (scratch1) == SCRATCH)
28365 || (GET_MODE (scratch1) == SImode
28366 && REG_P (scratch1)));
28367 gcc_assert (scratch2 == NULL
28368 || (GET_CODE (scratch2) == SCRATCH)
28369 || (GET_MODE (scratch2) == SImode
28370 && REG_P (scratch2)));
28371 gcc_assert (!REG_P (out) || !REG_P (amount)
28372 || !HARD_REGISTER_P (out)
28373 || (REGNO (out) != REGNO (amount)
28374 && REGNO (out) + 1 != REGNO (amount)));
28376 /* Macros to make following code more readable. */
28377 #define SUB_32(DEST,SRC) \
28378 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28379 #define RSB_32(DEST,SRC) \
28380 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28381 #define SUB_S_32(DEST,SRC) \
28382 gen_addsi3_compare0 ((DEST), (SRC), \
28383 GEN_INT (-32))
28384 #define SET(DEST,SRC) \
28385 gen_rtx_SET (SImode, (DEST), (SRC))
28386 #define SHIFT(CODE,SRC,AMOUNT) \
28387 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28388 #define LSHIFT(CODE,SRC,AMOUNT) \
28389 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28390 SImode, (SRC), (AMOUNT))
28391 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28392 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28393 SImode, (SRC), (AMOUNT))
28394 #define ORR(A,B) \
28395 gen_rtx_IOR (SImode, (A), (B))
28396 #define BRANCH(COND,LABEL) \
28397 gen_arm_cond_branch ((LABEL), \
28398 gen_rtx_ ## COND (CCmode, cc_reg, \
28399 const0_rtx), \
28400 cc_reg)
28402 /* Shifts by register and shifts by constant are handled separately. */
28403 if (CONST_INT_P (amount))
28405 /* We have a shift-by-constant. */
28407 /* First, handle out-of-range shift amounts.
28408 In both cases we try to match the result an ARM instruction in a
28409 shift-by-register would give. This helps reduce execution
28410 differences between optimization levels, but it won't stop other
28411 parts of the compiler doing different things. This is "undefined
28412 behaviour, in any case. */
28413 if (INTVAL (amount) <= 0)
28414 emit_insn (gen_movdi (out, in));
28415 else if (INTVAL (amount) >= 64)
28417 if (code == ASHIFTRT)
28419 rtx const31_rtx = GEN_INT (31);
28420 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
28421 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
28423 else
28424 emit_insn (gen_movdi (out, const0_rtx));
28427 /* Now handle valid shifts. */
28428 else if (INTVAL (amount) < 32)
28430 /* Shifts by a constant less than 32. */
28431 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
28433 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28434 emit_insn (SET (out_down,
28435 ORR (REV_LSHIFT (code, in_up, reverse_amount),
28436 out_down)));
28437 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28439 else
28441 /* Shifts by a constant greater than 31. */
28442 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
28444 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
28445 if (code == ASHIFTRT)
28446 emit_insn (gen_ashrsi3 (out_up, in_up,
28447 GEN_INT (31)));
28448 else
28449 emit_insn (SET (out_up, const0_rtx));
28452 else
28454 /* We have a shift-by-register. */
28455 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
28457 /* This alternative requires the scratch registers. */
28458 gcc_assert (scratch1 && REG_P (scratch1));
28459 gcc_assert (scratch2 && REG_P (scratch2));
28461 /* We will need the values "amount-32" and "32-amount" later.
28462 Swapping them around now allows the later code to be more general. */
28463 switch (code)
28465 case ASHIFT:
28466 emit_insn (SUB_32 (scratch1, amount));
28467 emit_insn (RSB_32 (scratch2, amount));
28468 break;
28469 case ASHIFTRT:
28470 emit_insn (RSB_32 (scratch1, amount));
28471 /* Also set CC = amount > 32. */
28472 emit_insn (SUB_S_32 (scratch2, amount));
28473 break;
28474 case LSHIFTRT:
28475 emit_insn (RSB_32 (scratch1, amount));
28476 emit_insn (SUB_32 (scratch2, amount));
28477 break;
28478 default:
28479 gcc_unreachable ();
28482 /* Emit code like this:
28484 arithmetic-left:
28485 out_down = in_down << amount;
28486 out_down = (in_up << (amount - 32)) | out_down;
28487 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28488 out_up = in_up << amount;
28490 arithmetic-right:
28491 out_down = in_down >> amount;
28492 out_down = (in_up << (32 - amount)) | out_down;
28493 if (amount < 32)
28494 out_down = ((signed)in_up >> (amount - 32)) | out_down;
28495 out_up = in_up << amount;
28497 logical-right:
28498 out_down = in_down >> amount;
28499 out_down = (in_up << (32 - amount)) | out_down;
28500 if (amount < 32)
28501 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28502 out_up = in_up << amount;
28504 The ARM and Thumb2 variants are the same but implemented slightly
28505 differently. If this were only called during expand we could just
28506 use the Thumb2 case and let combine do the right thing, but this
28507 can also be called from post-reload splitters. */
28509 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28511 if (!TARGET_THUMB2)
28513 /* Emit code for ARM mode. */
28514 emit_insn (SET (out_down,
28515 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
28516 if (code == ASHIFTRT)
28518 rtx_code_label *done_label = gen_label_rtx ();
28519 emit_jump_insn (BRANCH (LT, done_label));
28520 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
28521 out_down)));
28522 emit_label (done_label);
28524 else
28525 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
28526 out_down)));
28528 else
28530 /* Emit code for Thumb2 mode.
28531 Thumb2 can't do shift and or in one insn. */
28532 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
28533 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
28535 if (code == ASHIFTRT)
28537 rtx_code_label *done_label = gen_label_rtx ();
28538 emit_jump_insn (BRANCH (LT, done_label));
28539 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
28540 emit_insn (SET (out_down, ORR (out_down, scratch2)));
28541 emit_label (done_label);
28543 else
28545 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
28546 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
28550 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28553 #undef SUB_32
28554 #undef RSB_32
28555 #undef SUB_S_32
28556 #undef SET
28557 #undef SHIFT
28558 #undef LSHIFT
28559 #undef REV_LSHIFT
28560 #undef ORR
28561 #undef BRANCH
28565 /* Returns true if a valid comparison operation and makes
28566 the operands in a form that is valid. */
28567 bool
28568 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
28570 enum rtx_code code = GET_CODE (*comparison);
28571 int code_int;
28572 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
28573 ? GET_MODE (*op2) : GET_MODE (*op1);
28575 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
28577 if (code == UNEQ || code == LTGT)
28578 return false;
28580 code_int = (int)code;
28581 arm_canonicalize_comparison (&code_int, op1, op2, 0);
28582 PUT_CODE (*comparison, (enum rtx_code)code_int);
28584 switch (mode)
28586 case SImode:
28587 if (!arm_add_operand (*op1, mode))
28588 *op1 = force_reg (mode, *op1);
28589 if (!arm_add_operand (*op2, mode))
28590 *op2 = force_reg (mode, *op2);
28591 return true;
28593 case DImode:
28594 if (!cmpdi_operand (*op1, mode))
28595 *op1 = force_reg (mode, *op1);
28596 if (!cmpdi_operand (*op2, mode))
28597 *op2 = force_reg (mode, *op2);
28598 return true;
28600 case SFmode:
28601 case DFmode:
28602 if (!arm_float_compare_operand (*op1, mode))
28603 *op1 = force_reg (mode, *op1);
28604 if (!arm_float_compare_operand (*op2, mode))
28605 *op2 = force_reg (mode, *op2);
28606 return true;
28607 default:
28608 break;
28611 return false;
28615 /* Maximum number of instructions to set block of memory. */
28616 static int
28617 arm_block_set_max_insns (void)
28619 if (optimize_function_for_size_p (cfun))
28620 return 4;
28621 else
28622 return current_tune->max_insns_inline_memset;
28625 /* Return TRUE if it's profitable to set block of memory for
28626 non-vectorized case. VAL is the value to set the memory
28627 with. LENGTH is the number of bytes to set. ALIGN is the
28628 alignment of the destination memory in bytes. UNALIGNED_P
28629 is TRUE if we can only set the memory with instructions
28630 meeting alignment requirements. USE_STRD_P is TRUE if we
28631 can use strd to set the memory. */
28632 static bool
28633 arm_block_set_non_vect_profit_p (rtx val,
28634 unsigned HOST_WIDE_INT length,
28635 unsigned HOST_WIDE_INT align,
28636 bool unaligned_p, bool use_strd_p)
28638 int num = 0;
28639 /* For leftovers in bytes of 0-7, we can set the memory block using
28640 strb/strh/str with minimum instruction number. */
28641 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
28643 if (unaligned_p)
28645 num = arm_const_inline_cost (SET, val);
28646 num += length / align + length % align;
28648 else if (use_strd_p)
28650 num = arm_const_double_inline_cost (val);
28651 num += (length >> 3) + leftover[length & 7];
28653 else
28655 num = arm_const_inline_cost (SET, val);
28656 num += (length >> 2) + leftover[length & 3];
28659 /* We may be able to combine last pair STRH/STRB into a single STR
28660 by shifting one byte back. */
28661 if (unaligned_access && length > 3 && (length & 3) == 3)
28662 num--;
28664 return (num <= arm_block_set_max_insns ());
28667 /* Return TRUE if it's profitable to set block of memory for
28668 vectorized case. LENGTH is the number of bytes to set.
28669 ALIGN is the alignment of destination memory in bytes.
28670 MODE is the vector mode used to set the memory. */
28671 static bool
28672 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
28673 unsigned HOST_WIDE_INT align,
28674 machine_mode mode)
28676 int num;
28677 bool unaligned_p = ((align & 3) != 0);
28678 unsigned int nelt = GET_MODE_NUNITS (mode);
28680 /* Instruction loading constant value. */
28681 num = 1;
28682 /* Instructions storing the memory. */
28683 num += (length + nelt - 1) / nelt;
28684 /* Instructions adjusting the address expression. Only need to
28685 adjust address expression if it's 4 bytes aligned and bytes
28686 leftover can only be stored by mis-aligned store instruction. */
28687 if (!unaligned_p && (length & 3) != 0)
28688 num++;
28690 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
28691 if (!unaligned_p && mode == V16QImode)
28692 num--;
28694 return (num <= arm_block_set_max_insns ());
28697 /* Set a block of memory using vectorization instructions for the
28698 unaligned case. We fill the first LENGTH bytes of the memory
28699 area starting from DSTBASE with byte constant VALUE. ALIGN is
28700 the alignment requirement of memory. Return TRUE if succeeded. */
28701 static bool
28702 arm_block_set_unaligned_vect (rtx dstbase,
28703 unsigned HOST_WIDE_INT length,
28704 unsigned HOST_WIDE_INT value,
28705 unsigned HOST_WIDE_INT align)
28707 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
28708 rtx dst, mem;
28709 rtx val_elt, val_vec, reg;
28710 rtx rval[MAX_VECT_LEN];
28711 rtx (*gen_func) (rtx, rtx);
28712 machine_mode mode;
28713 unsigned HOST_WIDE_INT v = value;
28715 gcc_assert ((align & 0x3) != 0);
28716 nelt_v8 = GET_MODE_NUNITS (V8QImode);
28717 nelt_v16 = GET_MODE_NUNITS (V16QImode);
28718 if (length >= nelt_v16)
28720 mode = V16QImode;
28721 gen_func = gen_movmisalignv16qi;
28723 else
28725 mode = V8QImode;
28726 gen_func = gen_movmisalignv8qi;
28728 nelt_mode = GET_MODE_NUNITS (mode);
28729 gcc_assert (length >= nelt_mode);
28730 /* Skip if it isn't profitable. */
28731 if (!arm_block_set_vect_profit_p (length, align, mode))
28732 return false;
28734 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28735 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28737 v = sext_hwi (v, BITS_PER_WORD);
28738 val_elt = GEN_INT (v);
28739 for (j = 0; j < nelt_mode; j++)
28740 rval[j] = val_elt;
28742 reg = gen_reg_rtx (mode);
28743 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28744 /* Emit instruction loading the constant value. */
28745 emit_move_insn (reg, val_vec);
28747 /* Handle nelt_mode bytes in a vector. */
28748 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
28750 emit_insn ((*gen_func) (mem, reg));
28751 if (i + 2 * nelt_mode <= length)
28752 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
28755 /* If there are not less than nelt_v8 bytes leftover, we must be in
28756 V16QI mode. */
28757 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
28759 /* Handle (8, 16) bytes leftover. */
28760 if (i + nelt_v8 < length)
28762 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
28763 /* We are shifting bytes back, set the alignment accordingly. */
28764 if ((length & 1) != 0 && align >= 2)
28765 set_mem_align (mem, BITS_PER_UNIT);
28767 emit_insn (gen_movmisalignv16qi (mem, reg));
28769 /* Handle (0, 8] bytes leftover. */
28770 else if (i < length && i + nelt_v8 >= length)
28772 if (mode == V16QImode)
28774 reg = gen_lowpart (V8QImode, reg);
28775 mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
28777 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
28778 + (nelt_mode - nelt_v8))));
28779 /* We are shifting bytes back, set the alignment accordingly. */
28780 if ((length & 1) != 0 && align >= 2)
28781 set_mem_align (mem, BITS_PER_UNIT);
28783 emit_insn (gen_movmisalignv8qi (mem, reg));
28786 return true;
28789 /* Set a block of memory using vectorization instructions for the
28790 aligned case. We fill the first LENGTH bytes of the memory area
28791 starting from DSTBASE with byte constant VALUE. ALIGN is the
28792 alignment requirement of memory. Return TRUE if succeeded. */
28793 static bool
28794 arm_block_set_aligned_vect (rtx dstbase,
28795 unsigned HOST_WIDE_INT length,
28796 unsigned HOST_WIDE_INT value,
28797 unsigned HOST_WIDE_INT align)
28799 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
28800 rtx dst, addr, mem;
28801 rtx val_elt, val_vec, reg;
28802 rtx rval[MAX_VECT_LEN];
28803 machine_mode mode;
28804 unsigned HOST_WIDE_INT v = value;
28806 gcc_assert ((align & 0x3) == 0);
28807 nelt_v8 = GET_MODE_NUNITS (V8QImode);
28808 nelt_v16 = GET_MODE_NUNITS (V16QImode);
28809 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
28810 mode = V16QImode;
28811 else
28812 mode = V8QImode;
28814 nelt_mode = GET_MODE_NUNITS (mode);
28815 gcc_assert (length >= nelt_mode);
28816 /* Skip if it isn't profitable. */
28817 if (!arm_block_set_vect_profit_p (length, align, mode))
28818 return false;
28820 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28822 v = sext_hwi (v, BITS_PER_WORD);
28823 val_elt = GEN_INT (v);
28824 for (j = 0; j < nelt_mode; j++)
28825 rval[j] = val_elt;
28827 reg = gen_reg_rtx (mode);
28828 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28829 /* Emit instruction loading the constant value. */
28830 emit_move_insn (reg, val_vec);
28832 i = 0;
28833 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
28834 if (mode == V16QImode)
28836 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28837 emit_insn (gen_movmisalignv16qi (mem, reg));
28838 i += nelt_mode;
28839 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
28840 if (i + nelt_v8 < length && i + nelt_v16 > length)
28842 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
28843 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28844 /* We are shifting bytes back, set the alignment accordingly. */
28845 if ((length & 0x3) == 0)
28846 set_mem_align (mem, BITS_PER_UNIT * 4);
28847 else if ((length & 0x1) == 0)
28848 set_mem_align (mem, BITS_PER_UNIT * 2);
28849 else
28850 set_mem_align (mem, BITS_PER_UNIT);
28852 emit_insn (gen_movmisalignv16qi (mem, reg));
28853 return true;
28855 /* Fall through for bytes leftover. */
28856 mode = V8QImode;
28857 nelt_mode = GET_MODE_NUNITS (mode);
28858 reg = gen_lowpart (V8QImode, reg);
28861 /* Handle 8 bytes in a vector. */
28862 for (; (i + nelt_mode <= length); i += nelt_mode)
28864 addr = plus_constant (Pmode, dst, i);
28865 mem = adjust_automodify_address (dstbase, mode, addr, i);
28866 emit_move_insn (mem, reg);
28869 /* Handle single word leftover by shifting 4 bytes back. We can
28870 use aligned access for this case. */
28871 if (i + UNITS_PER_WORD == length)
28873 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
28874 mem = adjust_automodify_address (dstbase, mode,
28875 addr, i - UNITS_PER_WORD);
28876 /* We are shifting 4 bytes back, set the alignment accordingly. */
28877 if (align > UNITS_PER_WORD)
28878 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
28880 emit_move_insn (mem, reg);
28882 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
28883 We have to use unaligned access for this case. */
28884 else if (i < length)
28886 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
28887 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28888 /* We are shifting bytes back, set the alignment accordingly. */
28889 if ((length & 1) == 0)
28890 set_mem_align (mem, BITS_PER_UNIT * 2);
28891 else
28892 set_mem_align (mem, BITS_PER_UNIT);
28894 emit_insn (gen_movmisalignv8qi (mem, reg));
28897 return true;
28900 /* Set a block of memory using plain strh/strb instructions, only
28901 using instructions allowed by ALIGN on processor. We fill the
28902 first LENGTH bytes of the memory area starting from DSTBASE
28903 with byte constant VALUE. ALIGN is the alignment requirement
28904 of memory. */
28905 static bool
28906 arm_block_set_unaligned_non_vect (rtx dstbase,
28907 unsigned HOST_WIDE_INT length,
28908 unsigned HOST_WIDE_INT value,
28909 unsigned HOST_WIDE_INT align)
28911 unsigned int i;
28912 rtx dst, addr, mem;
28913 rtx val_exp, val_reg, reg;
28914 machine_mode mode;
28915 HOST_WIDE_INT v = value;
28917 gcc_assert (align == 1 || align == 2);
28919 if (align == 2)
28920 v |= (value << BITS_PER_UNIT);
28922 v = sext_hwi (v, BITS_PER_WORD);
28923 val_exp = GEN_INT (v);
28924 /* Skip if it isn't profitable. */
28925 if (!arm_block_set_non_vect_profit_p (val_exp, length,
28926 align, true, false))
28927 return false;
28929 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28930 mode = (align == 2 ? HImode : QImode);
28931 val_reg = force_reg (SImode, val_exp);
28932 reg = gen_lowpart (mode, val_reg);
28934 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
28936 addr = plus_constant (Pmode, dst, i);
28937 mem = adjust_automodify_address (dstbase, mode, addr, i);
28938 emit_move_insn (mem, reg);
28941 /* Handle single byte leftover. */
28942 if (i + 1 == length)
28944 reg = gen_lowpart (QImode, val_reg);
28945 addr = plus_constant (Pmode, dst, i);
28946 mem = adjust_automodify_address (dstbase, QImode, addr, i);
28947 emit_move_insn (mem, reg);
28948 i++;
28951 gcc_assert (i == length);
28952 return true;
28955 /* Set a block of memory using plain strd/str/strh/strb instructions,
28956 to permit unaligned copies on processors which support unaligned
28957 semantics for those instructions. We fill the first LENGTH bytes
28958 of the memory area starting from DSTBASE with byte constant VALUE.
28959 ALIGN is the alignment requirement of memory. */
28960 static bool
28961 arm_block_set_aligned_non_vect (rtx dstbase,
28962 unsigned HOST_WIDE_INT length,
28963 unsigned HOST_WIDE_INT value,
28964 unsigned HOST_WIDE_INT align)
28966 unsigned int i;
28967 rtx dst, addr, mem;
28968 rtx val_exp, val_reg, reg;
28969 unsigned HOST_WIDE_INT v;
28970 bool use_strd_p;
28972 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
28973 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
28975 v = (value | (value << 8) | (value << 16) | (value << 24));
28976 if (length < UNITS_PER_WORD)
28977 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
28979 if (use_strd_p)
28980 v |= (v << BITS_PER_WORD);
28981 else
28982 v = sext_hwi (v, BITS_PER_WORD);
28984 val_exp = GEN_INT (v);
28985 /* Skip if it isn't profitable. */
28986 if (!arm_block_set_non_vect_profit_p (val_exp, length,
28987 align, false, use_strd_p))
28989 if (!use_strd_p)
28990 return false;
28992 /* Try without strd. */
28993 v = (v >> BITS_PER_WORD);
28994 v = sext_hwi (v, BITS_PER_WORD);
28995 val_exp = GEN_INT (v);
28996 use_strd_p = false;
28997 if (!arm_block_set_non_vect_profit_p (val_exp, length,
28998 align, false, use_strd_p))
28999 return false;
29002 i = 0;
29003 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29004 /* Handle double words using strd if possible. */
29005 if (use_strd_p)
29007 val_reg = force_reg (DImode, val_exp);
29008 reg = val_reg;
29009 for (; (i + 8 <= length); i += 8)
29011 addr = plus_constant (Pmode, dst, i);
29012 mem = adjust_automodify_address (dstbase, DImode, addr, i);
29013 emit_move_insn (mem, reg);
29016 else
29017 val_reg = force_reg (SImode, val_exp);
29019 /* Handle words. */
29020 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
29021 for (; (i + 4 <= length); i += 4)
29023 addr = plus_constant (Pmode, dst, i);
29024 mem = adjust_automodify_address (dstbase, SImode, addr, i);
29025 if ((align & 3) == 0)
29026 emit_move_insn (mem, reg);
29027 else
29028 emit_insn (gen_unaligned_storesi (mem, reg));
29031 /* Merge last pair of STRH and STRB into a STR if possible. */
29032 if (unaligned_access && i > 0 && (i + 3) == length)
29034 addr = plus_constant (Pmode, dst, i - 1);
29035 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
29036 /* We are shifting one byte back, set the alignment accordingly. */
29037 if ((align & 1) == 0)
29038 set_mem_align (mem, BITS_PER_UNIT);
29040 /* Most likely this is an unaligned access, and we can't tell at
29041 compilation time. */
29042 emit_insn (gen_unaligned_storesi (mem, reg));
29043 return true;
29046 /* Handle half word leftover. */
29047 if (i + 2 <= length)
29049 reg = gen_lowpart (HImode, val_reg);
29050 addr = plus_constant (Pmode, dst, i);
29051 mem = adjust_automodify_address (dstbase, HImode, addr, i);
29052 if ((align & 1) == 0)
29053 emit_move_insn (mem, reg);
29054 else
29055 emit_insn (gen_unaligned_storehi (mem, reg));
29057 i += 2;
29060 /* Handle single byte leftover. */
29061 if (i + 1 == length)
29063 reg = gen_lowpart (QImode, val_reg);
29064 addr = plus_constant (Pmode, dst, i);
29065 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29066 emit_move_insn (mem, reg);
29069 return true;
29072 /* Set a block of memory using vectorization instructions for both
29073 aligned and unaligned cases. We fill the first LENGTH bytes of
29074 the memory area starting from DSTBASE with byte constant VALUE.
29075 ALIGN is the alignment requirement of memory. */
29076 static bool
29077 arm_block_set_vect (rtx dstbase,
29078 unsigned HOST_WIDE_INT length,
29079 unsigned HOST_WIDE_INT value,
29080 unsigned HOST_WIDE_INT align)
29082 /* Check whether we need to use unaligned store instruction. */
29083 if (((align & 3) != 0 || (length & 3) != 0)
29084 /* Check whether unaligned store instruction is available. */
29085 && (!unaligned_access || BYTES_BIG_ENDIAN))
29086 return false;
29088 if ((align & 3) == 0)
29089 return arm_block_set_aligned_vect (dstbase, length, value, align);
29090 else
29091 return arm_block_set_unaligned_vect (dstbase, length, value, align);
29094 /* Expand string store operation. Firstly we try to do that by using
29095 vectorization instructions, then try with ARM unaligned access and
29096 double-word store if profitable. OPERANDS[0] is the destination,
29097 OPERANDS[1] is the number of bytes, operands[2] is the value to
29098 initialize the memory, OPERANDS[3] is the known alignment of the
29099 destination. */
29100 bool
29101 arm_gen_setmem (rtx *operands)
29103 rtx dstbase = operands[0];
29104 unsigned HOST_WIDE_INT length;
29105 unsigned HOST_WIDE_INT value;
29106 unsigned HOST_WIDE_INT align;
29108 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
29109 return false;
29111 length = UINTVAL (operands[1]);
29112 if (length > 64)
29113 return false;
29115 value = (UINTVAL (operands[2]) & 0xFF);
29116 align = UINTVAL (operands[3]);
29117 if (TARGET_NEON && length >= 8
29118 && current_tune->string_ops_prefer_neon
29119 && arm_block_set_vect (dstbase, length, value, align))
29120 return true;
29122 if (!unaligned_access && (align & 3) != 0)
29123 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
29125 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
29129 static bool
29130 arm_macro_fusion_p (void)
29132 return current_tune->fuseable_ops != ARM_FUSE_NOTHING;
29136 static bool
29137 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
29139 rtx set_dest;
29140 rtx prev_set = single_set (prev);
29141 rtx curr_set = single_set (curr);
29143 if (!prev_set
29144 || !curr_set)
29145 return false;
29147 if (any_condjump_p (curr))
29148 return false;
29150 if (!arm_macro_fusion_p ())
29151 return false;
29153 if (current_tune->fuseable_ops & ARM_FUSE_MOVW_MOVT)
29155 /* We are trying to fuse
29156 movw imm / movt imm
29157 instructions as a group that gets scheduled together. */
29159 set_dest = SET_DEST (curr_set);
29161 if (GET_MODE (set_dest) != SImode)
29162 return false;
29164 /* We are trying to match:
29165 prev (movw) == (set (reg r0) (const_int imm16))
29166 curr (movt) == (set (zero_extract (reg r0)
29167 (const_int 16)
29168 (const_int 16))
29169 (const_int imm16_1))
29171 prev (movw) == (set (reg r1)
29172 (high (symbol_ref ("SYM"))))
29173 curr (movt) == (set (reg r0)
29174 (lo_sum (reg r1)
29175 (symbol_ref ("SYM")))) */
29176 if (GET_CODE (set_dest) == ZERO_EXTRACT)
29178 if (CONST_INT_P (SET_SRC (curr_set))
29179 && CONST_INT_P (SET_SRC (prev_set))
29180 && REG_P (XEXP (set_dest, 0))
29181 && REG_P (SET_DEST (prev_set))
29182 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
29183 return true;
29185 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
29186 && REG_P (SET_DEST (curr_set))
29187 && REG_P (SET_DEST (prev_set))
29188 && GET_CODE (SET_SRC (prev_set)) == HIGH
29189 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
29190 return true;
29192 return false;
29195 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
29197 static unsigned HOST_WIDE_INT
29198 arm_asan_shadow_offset (void)
29200 return (unsigned HOST_WIDE_INT) 1 << 29;
29204 /* This is a temporary fix for PR60655. Ideally we need
29205 to handle most of these cases in the generic part but
29206 currently we reject minus (..) (sym_ref). We try to
29207 ameliorate the case with minus (sym_ref1) (sym_ref2)
29208 where they are in the same section. */
29210 static bool
29211 arm_const_not_ok_for_debug_p (rtx p)
29213 tree decl_op0 = NULL;
29214 tree decl_op1 = NULL;
29216 if (GET_CODE (p) == MINUS)
29218 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
29220 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
29221 if (decl_op1
29222 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
29223 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
29225 if ((TREE_CODE (decl_op1) == VAR_DECL
29226 || TREE_CODE (decl_op1) == CONST_DECL)
29227 && (TREE_CODE (decl_op0) == VAR_DECL
29228 || TREE_CODE (decl_op0) == CONST_DECL))
29229 return (get_variable_section (decl_op1, false)
29230 != get_variable_section (decl_op0, false));
29232 if (TREE_CODE (decl_op1) == LABEL_DECL
29233 && TREE_CODE (decl_op0) == LABEL_DECL)
29234 return (DECL_CONTEXT (decl_op1)
29235 != DECL_CONTEXT (decl_op0));
29238 return true;
29242 return false;
29245 /* return TRUE if x is a reference to a value in a constant pool */
29246 extern bool
29247 arm_is_constant_pool_ref (rtx x)
29249 return (MEM_P (x)
29250 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
29251 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
29254 /* If MEM is in the form of [base+offset], extract the two parts
29255 of address and set to BASE and OFFSET, otherwise return false
29256 after clearing BASE and OFFSET. */
29258 static bool
29259 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
29261 rtx addr;
29263 gcc_assert (MEM_P (mem));
29265 addr = XEXP (mem, 0);
29267 /* Strip off const from addresses like (const (addr)). */
29268 if (GET_CODE (addr) == CONST)
29269 addr = XEXP (addr, 0);
29271 if (GET_CODE (addr) == REG)
29273 *base = addr;
29274 *offset = const0_rtx;
29275 return true;
29278 if (GET_CODE (addr) == PLUS
29279 && GET_CODE (XEXP (addr, 0)) == REG
29280 && CONST_INT_P (XEXP (addr, 1)))
29282 *base = XEXP (addr, 0);
29283 *offset = XEXP (addr, 1);
29284 return true;
29287 *base = NULL_RTX;
29288 *offset = NULL_RTX;
29290 return false;
29293 /* If INSN is a load or store of address in the form of [base+offset],
29294 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
29295 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
29296 otherwise return FALSE. */
29298 static bool
29299 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
29301 rtx x, dest, src;
29303 gcc_assert (INSN_P (insn));
29304 x = PATTERN (insn);
29305 if (GET_CODE (x) != SET)
29306 return false;
29308 src = SET_SRC (x);
29309 dest = SET_DEST (x);
29310 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
29312 *is_load = false;
29313 extract_base_offset_in_addr (dest, base, offset);
29315 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
29317 *is_load = true;
29318 extract_base_offset_in_addr (src, base, offset);
29320 else
29321 return false;
29323 return (*base != NULL_RTX && *offset != NULL_RTX);
29326 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
29328 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
29329 and PRI are only calculated for these instructions. For other instruction,
29330 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
29331 instruction fusion can be supported by returning different priorities.
29333 It's important that irrelevant instructions get the largest FUSION_PRI. */
29335 static void
29336 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
29337 int *fusion_pri, int *pri)
29339 int tmp, off_val;
29340 bool is_load;
29341 rtx base, offset;
29343 gcc_assert (INSN_P (insn));
29345 tmp = max_pri - 1;
29346 if (!fusion_load_store (insn, &base, &offset, &is_load))
29348 *pri = tmp;
29349 *fusion_pri = tmp;
29350 return;
29353 /* Load goes first. */
29354 if (is_load)
29355 *fusion_pri = tmp - 1;
29356 else
29357 *fusion_pri = tmp - 2;
29359 tmp /= 2;
29361 /* INSN with smaller base register goes first. */
29362 tmp -= ((REGNO (base) & 0xff) << 20);
29364 /* INSN with smaller offset goes first. */
29365 off_val = (int)(INTVAL (offset));
29366 if (off_val >= 0)
29367 tmp -= (off_val & 0xfffff);
29368 else
29369 tmp += ((- off_val) & 0xfffff);
29371 *pri = tmp;
29372 return;
29374 #include "gt-arm.h"