[Patch ARM Refactor Builtins 7/8] Use qualifiers arrays when initialising builtins...
[official-gcc.git] / gcc / config / arm / arm.c
blobe37465e98ca1a05430c89361d9d10ef22b1556ae
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "hash-table.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
32 #include "calls.h"
33 #include "varasm.h"
34 #include "obstack.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
39 #include "output.h"
40 #include "insn-attr.h"
41 #include "flags.h"
42 #include "reload.h"
43 #include "hashtab.h"
44 #include "hash-set.h"
45 #include "vec.h"
46 #include "machmode.h"
47 #include "input.h"
48 #include "function.h"
49 #include "expr.h"
50 #include "insn-codes.h"
51 #include "optabs.h"
52 #include "diagnostic-core.h"
53 #include "recog.h"
54 #include "predict.h"
55 #include "dominance.h"
56 #include "cfg.h"
57 #include "cfgrtl.h"
58 #include "cfganal.h"
59 #include "lcm.h"
60 #include "cfgbuild.h"
61 #include "cfgcleanup.h"
62 #include "basic-block.h"
63 #include "hash-map.h"
64 #include "is-a.h"
65 #include "plugin-api.h"
66 #include "ipa-ref.h"
67 #include "cgraph.h"
68 #include "ggc.h"
69 #include "except.h"
70 #include "tm_p.h"
71 #include "target.h"
72 #include "sched-int.h"
73 #include "target-def.h"
74 #include "debug.h"
75 #include "langhooks.h"
76 #include "df.h"
77 #include "intl.h"
78 #include "libfuncs.h"
79 #include "params.h"
80 #include "opts.h"
81 #include "dumpfile.h"
82 #include "gimple-expr.h"
83 #include "builtins.h"
84 #include "tm-constrs.h"
85 #include "rtl-iter.h"
87 /* Forward definitions of types. */
88 typedef struct minipool_node Mnode;
89 typedef struct minipool_fixup Mfix;
91 void (*arm_lang_output_object_attributes_hook)(void);
93 struct four_ints
95 int i[4];
98 /* Forward function declarations. */
99 static bool arm_const_not_ok_for_debug_p (rtx);
100 static bool arm_lra_p (void);
101 static bool arm_needs_doubleword_align (machine_mode, const_tree);
102 static int arm_compute_static_chain_stack_bytes (void);
103 static arm_stack_offsets *arm_get_frame_offsets (void);
104 static void arm_add_gc_roots (void);
105 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
106 HOST_WIDE_INT, rtx, rtx, int, int);
107 static unsigned bit_count (unsigned long);
108 static int arm_address_register_rtx_p (rtx, int);
109 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
110 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
111 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
112 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
113 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
114 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
115 inline static int thumb1_index_register_rtx_p (rtx, int);
116 static int thumb_far_jump_used_p (void);
117 static bool thumb_force_lr_save (void);
118 static unsigned arm_size_return_regs (void);
119 static bool arm_assemble_integer (rtx, unsigned int, int);
120 static void arm_print_operand (FILE *, rtx, int);
121 static void arm_print_operand_address (FILE *, rtx);
122 static bool arm_print_operand_punct_valid_p (unsigned char code);
123 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
124 static arm_cc get_arm_condition_code (rtx);
125 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
126 static const char *output_multi_immediate (rtx *, const char *, const char *,
127 int, HOST_WIDE_INT);
128 static const char *shift_op (rtx, HOST_WIDE_INT *);
129 static struct machine_function *arm_init_machine_status (void);
130 static void thumb_exit (FILE *, int);
131 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
132 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
133 static Mnode *add_minipool_forward_ref (Mfix *);
134 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
135 static Mnode *add_minipool_backward_ref (Mfix *);
136 static void assign_minipool_offsets (Mfix *);
137 static void arm_print_value (FILE *, rtx);
138 static void dump_minipool (rtx_insn *);
139 static int arm_barrier_cost (rtx);
140 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
141 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
142 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
143 machine_mode, rtx);
144 static void arm_reorg (void);
145 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
146 static unsigned long arm_compute_save_reg0_reg12_mask (void);
147 static unsigned long arm_compute_save_reg_mask (void);
148 static unsigned long arm_isr_value (tree);
149 static unsigned long arm_compute_func_type (void);
150 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
151 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
152 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
153 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
154 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
155 #endif
156 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
157 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
158 static int arm_comp_type_attributes (const_tree, const_tree);
159 static void arm_set_default_type_attributes (tree);
160 static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
161 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
162 static int optimal_immediate_sequence (enum rtx_code code,
163 unsigned HOST_WIDE_INT val,
164 struct four_ints *return_sequence);
165 static int optimal_immediate_sequence_1 (enum rtx_code code,
166 unsigned HOST_WIDE_INT val,
167 struct four_ints *return_sequence,
168 int i);
169 static int arm_get_strip_length (int);
170 static bool arm_function_ok_for_sibcall (tree, tree);
171 static machine_mode arm_promote_function_mode (const_tree,
172 machine_mode, int *,
173 const_tree, int);
174 static bool arm_return_in_memory (const_tree, const_tree);
175 static rtx arm_function_value (const_tree, const_tree, bool);
176 static rtx arm_libcall_value_1 (machine_mode);
177 static rtx arm_libcall_value (machine_mode, const_rtx);
178 static bool arm_function_value_regno_p (const unsigned int);
179 static void arm_internal_label (FILE *, const char *, unsigned long);
180 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
181 tree);
182 static bool arm_have_conditional_execution (void);
183 static bool arm_cannot_force_const_mem (machine_mode, rtx);
184 static bool arm_legitimate_constant_p (machine_mode, rtx);
185 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
186 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
187 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
188 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
189 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
190 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
191 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
192 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
193 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
194 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
195 static void emit_constant_insn (rtx cond, rtx pattern);
196 static rtx_insn *emit_set_insn (rtx, rtx);
197 static rtx emit_multi_reg_push (unsigned long, unsigned long);
198 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
199 tree, bool);
200 static rtx arm_function_arg (cumulative_args_t, machine_mode,
201 const_tree, bool);
202 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
203 const_tree, bool);
204 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
205 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
206 const_tree);
207 static rtx aapcs_libcall_value (machine_mode);
208 static int aapcs_select_return_coproc (const_tree, const_tree);
210 #ifdef OBJECT_FORMAT_ELF
211 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
212 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
213 #endif
214 #ifndef ARM_PE
215 static void arm_encode_section_info (tree, rtx, int);
216 #endif
218 static void arm_file_end (void);
219 static void arm_file_start (void);
221 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
222 tree, int *, int);
223 static bool arm_pass_by_reference (cumulative_args_t,
224 machine_mode, const_tree, bool);
225 static bool arm_promote_prototypes (const_tree);
226 static bool arm_default_short_enums (void);
227 static bool arm_align_anon_bitfield (void);
228 static bool arm_return_in_msb (const_tree);
229 static bool arm_must_pass_in_stack (machine_mode, const_tree);
230 static bool arm_return_in_memory (const_tree, const_tree);
231 #if ARM_UNWIND_INFO
232 static void arm_unwind_emit (FILE *, rtx_insn *);
233 static bool arm_output_ttype (rtx);
234 static void arm_asm_emit_except_personality (rtx);
235 static void arm_asm_init_sections (void);
236 #endif
237 static rtx arm_dwarf_register_span (rtx);
239 static tree arm_cxx_guard_type (void);
240 static bool arm_cxx_guard_mask_bit (void);
241 static tree arm_get_cookie_size (tree);
242 static bool arm_cookie_has_size (void);
243 static bool arm_cxx_cdtor_returns_this (void);
244 static bool arm_cxx_key_method_may_be_inline (void);
245 static void arm_cxx_determine_class_data_visibility (tree);
246 static bool arm_cxx_class_data_always_comdat (void);
247 static bool arm_cxx_use_aeabi_atexit (void);
248 static void arm_init_libfuncs (void);
249 static tree arm_build_builtin_va_list (void);
250 static void arm_expand_builtin_va_start (tree, rtx);
251 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
252 static void arm_option_override (void);
253 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
254 static bool arm_cannot_copy_insn_p (rtx_insn *);
255 static int arm_issue_rate (void);
256 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
257 static bool arm_output_addr_const_extra (FILE *, rtx);
258 static bool arm_allocate_stack_slots_for_args (void);
259 static bool arm_warn_func_return (tree);
260 static const char *arm_invalid_parameter_type (const_tree t);
261 static const char *arm_invalid_return_type (const_tree t);
262 static tree arm_promoted_type (const_tree t);
263 static tree arm_convert_to_type (tree type, tree expr);
264 static bool arm_scalar_mode_supported_p (machine_mode);
265 static bool arm_frame_pointer_required (void);
266 static bool arm_can_eliminate (const int, const int);
267 static void arm_asm_trampoline_template (FILE *);
268 static void arm_trampoline_init (rtx, tree, rtx);
269 static rtx arm_trampoline_adjust_address (rtx);
270 static rtx arm_pic_static_addr (rtx orig, rtx reg);
271 static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
272 static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
273 static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
274 static bool arm_array_mode_supported_p (machine_mode,
275 unsigned HOST_WIDE_INT);
276 static machine_mode arm_preferred_simd_mode (machine_mode);
277 static bool arm_class_likely_spilled_p (reg_class_t);
278 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
279 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
280 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
281 const_tree type,
282 int misalignment,
283 bool is_packed);
284 static void arm_conditional_register_usage (void);
285 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
286 static unsigned int arm_autovectorize_vector_sizes (void);
287 static int arm_default_branch_cost (bool, bool);
288 static int arm_cortex_a5_branch_cost (bool, bool);
289 static int arm_cortex_m_branch_cost (bool, bool);
291 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
292 const unsigned char *sel);
294 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
295 tree vectype,
296 int misalign ATTRIBUTE_UNUSED);
297 static unsigned arm_add_stmt_cost (void *data, int count,
298 enum vect_cost_for_stmt kind,
299 struct _stmt_vec_info *stmt_info,
300 int misalign,
301 enum vect_cost_model_location where);
303 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
304 bool op0_preserve_value);
305 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
307 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
309 /* Table of machine attributes. */
310 static const struct attribute_spec arm_attribute_table[] =
312 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
313 affects_type_identity } */
314 /* Function calls made to this symbol must be done indirectly, because
315 it may lie outside of the 26 bit addressing range of a normal function
316 call. */
317 { "long_call", 0, 0, false, true, true, NULL, false },
318 /* Whereas these functions are always known to reside within the 26 bit
319 addressing range. */
320 { "short_call", 0, 0, false, true, true, NULL, false },
321 /* Specify the procedure call conventions for a function. */
322 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
323 false },
324 /* Interrupt Service Routines have special prologue and epilogue requirements. */
325 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
326 false },
327 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
328 false },
329 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
330 false },
331 #ifdef ARM_PE
332 /* ARM/PE has three new attributes:
333 interfacearm - ?
334 dllexport - for exporting a function/variable that will live in a dll
335 dllimport - for importing a function/variable from a dll
337 Microsoft allows multiple declspecs in one __declspec, separating
338 them with spaces. We do NOT support this. Instead, use __declspec
339 multiple times.
341 { "dllimport", 0, 0, true, false, false, NULL, false },
342 { "dllexport", 0, 0, true, false, false, NULL, false },
343 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
344 false },
345 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
346 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
347 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
348 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
349 false },
350 #endif
351 { NULL, 0, 0, false, false, false, NULL, false }
354 /* Initialize the GCC target structure. */
355 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
356 #undef TARGET_MERGE_DECL_ATTRIBUTES
357 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
358 #endif
360 #undef TARGET_LEGITIMIZE_ADDRESS
361 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
363 #undef TARGET_LRA_P
364 #define TARGET_LRA_P arm_lra_p
366 #undef TARGET_ATTRIBUTE_TABLE
367 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
369 #undef TARGET_ASM_FILE_START
370 #define TARGET_ASM_FILE_START arm_file_start
371 #undef TARGET_ASM_FILE_END
372 #define TARGET_ASM_FILE_END arm_file_end
374 #undef TARGET_ASM_ALIGNED_SI_OP
375 #define TARGET_ASM_ALIGNED_SI_OP NULL
376 #undef TARGET_ASM_INTEGER
377 #define TARGET_ASM_INTEGER arm_assemble_integer
379 #undef TARGET_PRINT_OPERAND
380 #define TARGET_PRINT_OPERAND arm_print_operand
381 #undef TARGET_PRINT_OPERAND_ADDRESS
382 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
383 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
384 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
386 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
387 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
389 #undef TARGET_ASM_FUNCTION_PROLOGUE
390 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
392 #undef TARGET_ASM_FUNCTION_EPILOGUE
393 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
395 #undef TARGET_OPTION_OVERRIDE
396 #define TARGET_OPTION_OVERRIDE arm_option_override
398 #undef TARGET_COMP_TYPE_ATTRIBUTES
399 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
401 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
402 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
404 #undef TARGET_SCHED_ADJUST_COST
405 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
407 #undef TARGET_SCHED_REORDER
408 #define TARGET_SCHED_REORDER arm_sched_reorder
410 #undef TARGET_REGISTER_MOVE_COST
411 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
413 #undef TARGET_MEMORY_MOVE_COST
414 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
416 #undef TARGET_ENCODE_SECTION_INFO
417 #ifdef ARM_PE
418 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
419 #else
420 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
421 #endif
423 #undef TARGET_STRIP_NAME_ENCODING
424 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
426 #undef TARGET_ASM_INTERNAL_LABEL
427 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
429 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
430 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
432 #undef TARGET_FUNCTION_VALUE
433 #define TARGET_FUNCTION_VALUE arm_function_value
435 #undef TARGET_LIBCALL_VALUE
436 #define TARGET_LIBCALL_VALUE arm_libcall_value
438 #undef TARGET_FUNCTION_VALUE_REGNO_P
439 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
441 #undef TARGET_ASM_OUTPUT_MI_THUNK
442 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
443 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
444 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
446 #undef TARGET_RTX_COSTS
447 #define TARGET_RTX_COSTS arm_rtx_costs
448 #undef TARGET_ADDRESS_COST
449 #define TARGET_ADDRESS_COST arm_address_cost
451 #undef TARGET_SHIFT_TRUNCATION_MASK
452 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
453 #undef TARGET_VECTOR_MODE_SUPPORTED_P
454 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
455 #undef TARGET_ARRAY_MODE_SUPPORTED_P
456 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
457 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
458 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
459 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
460 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
461 arm_autovectorize_vector_sizes
463 #undef TARGET_MACHINE_DEPENDENT_REORG
464 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
466 #undef TARGET_INIT_BUILTINS
467 #define TARGET_INIT_BUILTINS arm_init_builtins
468 #undef TARGET_EXPAND_BUILTIN
469 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
470 #undef TARGET_BUILTIN_DECL
471 #define TARGET_BUILTIN_DECL arm_builtin_decl
473 #undef TARGET_INIT_LIBFUNCS
474 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
476 #undef TARGET_PROMOTE_FUNCTION_MODE
477 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
478 #undef TARGET_PROMOTE_PROTOTYPES
479 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
480 #undef TARGET_PASS_BY_REFERENCE
481 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
482 #undef TARGET_ARG_PARTIAL_BYTES
483 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
484 #undef TARGET_FUNCTION_ARG
485 #define TARGET_FUNCTION_ARG arm_function_arg
486 #undef TARGET_FUNCTION_ARG_ADVANCE
487 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
488 #undef TARGET_FUNCTION_ARG_BOUNDARY
489 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
491 #undef TARGET_SETUP_INCOMING_VARARGS
492 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
494 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
495 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
497 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
498 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
499 #undef TARGET_TRAMPOLINE_INIT
500 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
501 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
502 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
504 #undef TARGET_WARN_FUNC_RETURN
505 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
507 #undef TARGET_DEFAULT_SHORT_ENUMS
508 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
510 #undef TARGET_ALIGN_ANON_BITFIELD
511 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
513 #undef TARGET_NARROW_VOLATILE_BITFIELD
514 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
516 #undef TARGET_CXX_GUARD_TYPE
517 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
519 #undef TARGET_CXX_GUARD_MASK_BIT
520 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
522 #undef TARGET_CXX_GET_COOKIE_SIZE
523 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
525 #undef TARGET_CXX_COOKIE_HAS_SIZE
526 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
528 #undef TARGET_CXX_CDTOR_RETURNS_THIS
529 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
531 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
532 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
534 #undef TARGET_CXX_USE_AEABI_ATEXIT
535 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
537 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
538 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
539 arm_cxx_determine_class_data_visibility
541 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
542 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
544 #undef TARGET_RETURN_IN_MSB
545 #define TARGET_RETURN_IN_MSB arm_return_in_msb
547 #undef TARGET_RETURN_IN_MEMORY
548 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
550 #undef TARGET_MUST_PASS_IN_STACK
551 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
553 #if ARM_UNWIND_INFO
554 #undef TARGET_ASM_UNWIND_EMIT
555 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
557 /* EABI unwinding tables use a different format for the typeinfo tables. */
558 #undef TARGET_ASM_TTYPE
559 #define TARGET_ASM_TTYPE arm_output_ttype
561 #undef TARGET_ARM_EABI_UNWINDER
562 #define TARGET_ARM_EABI_UNWINDER true
564 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
565 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
567 #undef TARGET_ASM_INIT_SECTIONS
568 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
569 #endif /* ARM_UNWIND_INFO */
571 #undef TARGET_DWARF_REGISTER_SPAN
572 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
574 #undef TARGET_CANNOT_COPY_INSN_P
575 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
577 #ifdef HAVE_AS_TLS
578 #undef TARGET_HAVE_TLS
579 #define TARGET_HAVE_TLS true
580 #endif
582 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
583 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
585 #undef TARGET_LEGITIMATE_CONSTANT_P
586 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
588 #undef TARGET_CANNOT_FORCE_CONST_MEM
589 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
591 #undef TARGET_MAX_ANCHOR_OFFSET
592 #define TARGET_MAX_ANCHOR_OFFSET 4095
594 /* The minimum is set such that the total size of the block
595 for a particular anchor is -4088 + 1 + 4095 bytes, which is
596 divisible by eight, ensuring natural spacing of anchors. */
597 #undef TARGET_MIN_ANCHOR_OFFSET
598 #define TARGET_MIN_ANCHOR_OFFSET -4088
600 #undef TARGET_SCHED_ISSUE_RATE
601 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
603 #undef TARGET_MANGLE_TYPE
604 #define TARGET_MANGLE_TYPE arm_mangle_type
606 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
607 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
609 #undef TARGET_BUILD_BUILTIN_VA_LIST
610 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
611 #undef TARGET_EXPAND_BUILTIN_VA_START
612 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
613 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
614 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
616 #ifdef HAVE_AS_TLS
617 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
618 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
619 #endif
621 #undef TARGET_LEGITIMATE_ADDRESS_P
622 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
624 #undef TARGET_PREFERRED_RELOAD_CLASS
625 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
627 #undef TARGET_INVALID_PARAMETER_TYPE
628 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
630 #undef TARGET_INVALID_RETURN_TYPE
631 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
633 #undef TARGET_PROMOTED_TYPE
634 #define TARGET_PROMOTED_TYPE arm_promoted_type
636 #undef TARGET_CONVERT_TO_TYPE
637 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
639 #undef TARGET_SCALAR_MODE_SUPPORTED_P
640 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
642 #undef TARGET_FRAME_POINTER_REQUIRED
643 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
645 #undef TARGET_CAN_ELIMINATE
646 #define TARGET_CAN_ELIMINATE arm_can_eliminate
648 #undef TARGET_CONDITIONAL_REGISTER_USAGE
649 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
651 #undef TARGET_CLASS_LIKELY_SPILLED_P
652 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
654 #undef TARGET_VECTORIZE_BUILTINS
655 #define TARGET_VECTORIZE_BUILTINS
657 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
658 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
659 arm_builtin_vectorized_function
661 #undef TARGET_VECTOR_ALIGNMENT
662 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
664 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
665 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
666 arm_vector_alignment_reachable
668 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
669 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
670 arm_builtin_support_vector_misalignment
672 #undef TARGET_PREFERRED_RENAME_CLASS
673 #define TARGET_PREFERRED_RENAME_CLASS \
674 arm_preferred_rename_class
676 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
677 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
678 arm_vectorize_vec_perm_const_ok
680 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
681 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
682 arm_builtin_vectorization_cost
683 #undef TARGET_VECTORIZE_ADD_STMT_COST
684 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
686 #undef TARGET_CANONICALIZE_COMPARISON
687 #define TARGET_CANONICALIZE_COMPARISON \
688 arm_canonicalize_comparison
690 #undef TARGET_ASAN_SHADOW_OFFSET
691 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
693 #undef MAX_INSN_PER_IT_BLOCK
694 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
696 #undef TARGET_CAN_USE_DOLOOP_P
697 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
699 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
700 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
702 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
703 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
705 #undef TARGET_SCHED_FUSION_PRIORITY
706 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
708 struct gcc_target targetm = TARGET_INITIALIZER;
710 /* Obstack for minipool constant handling. */
711 static struct obstack minipool_obstack;
712 static char * minipool_startobj;
714 /* The maximum number of insns skipped which
715 will be conditionalised if possible. */
716 static int max_insns_skipped = 5;
718 extern FILE * asm_out_file;
720 /* True if we are currently building a constant table. */
721 int making_const_table;
723 /* The processor for which instructions should be scheduled. */
724 enum processor_type arm_tune = arm_none;
726 /* The current tuning set. */
727 const struct tune_params *current_tune;
729 /* Which floating point hardware to schedule for. */
730 int arm_fpu_attr;
732 /* Which floating popint hardware to use. */
733 const struct arm_fpu_desc *arm_fpu_desc;
735 /* Used for Thumb call_via trampolines. */
736 rtx thumb_call_via_label[14];
737 static int thumb_call_reg_needed;
739 /* The bits in this mask specify which
740 instructions we are allowed to generate. */
741 unsigned long insn_flags = 0;
743 /* The bits in this mask specify which instruction scheduling options should
744 be used. */
745 unsigned long tune_flags = 0;
747 /* The highest ARM architecture version supported by the
748 target. */
749 enum base_architecture arm_base_arch = BASE_ARCH_0;
751 /* The following are used in the arm.md file as equivalents to bits
752 in the above two flag variables. */
754 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
755 int arm_arch3m = 0;
757 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
758 int arm_arch4 = 0;
760 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
761 int arm_arch4t = 0;
763 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
764 int arm_arch5 = 0;
766 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
767 int arm_arch5e = 0;
769 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
770 int arm_arch6 = 0;
772 /* Nonzero if this chip supports the ARM 6K extensions. */
773 int arm_arch6k = 0;
775 /* Nonzero if instructions present in ARMv6-M can be used. */
776 int arm_arch6m = 0;
778 /* Nonzero if this chip supports the ARM 7 extensions. */
779 int arm_arch7 = 0;
781 /* Nonzero if instructions not present in the 'M' profile can be used. */
782 int arm_arch_notm = 0;
784 /* Nonzero if instructions present in ARMv7E-M can be used. */
785 int arm_arch7em = 0;
787 /* Nonzero if instructions present in ARMv8 can be used. */
788 int arm_arch8 = 0;
790 /* Nonzero if this chip can benefit from load scheduling. */
791 int arm_ld_sched = 0;
793 /* Nonzero if this chip is a StrongARM. */
794 int arm_tune_strongarm = 0;
796 /* Nonzero if this chip supports Intel Wireless MMX technology. */
797 int arm_arch_iwmmxt = 0;
799 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
800 int arm_arch_iwmmxt2 = 0;
802 /* Nonzero if this chip is an XScale. */
803 int arm_arch_xscale = 0;
805 /* Nonzero if tuning for XScale */
806 int arm_tune_xscale = 0;
808 /* Nonzero if we want to tune for stores that access the write-buffer.
809 This typically means an ARM6 or ARM7 with MMU or MPU. */
810 int arm_tune_wbuf = 0;
812 /* Nonzero if tuning for Cortex-A9. */
813 int arm_tune_cortex_a9 = 0;
815 /* Nonzero if generating Thumb instructions. */
816 int thumb_code = 0;
818 /* Nonzero if generating Thumb-1 instructions. */
819 int thumb1_code = 0;
821 /* Nonzero if we should define __THUMB_INTERWORK__ in the
822 preprocessor.
823 XXX This is a bit of a hack, it's intended to help work around
824 problems in GLD which doesn't understand that armv5t code is
825 interworking clean. */
826 int arm_cpp_interwork = 0;
828 /* Nonzero if chip supports Thumb 2. */
829 int arm_arch_thumb2;
831 /* Nonzero if chip supports integer division instruction. */
832 int arm_arch_arm_hwdiv;
833 int arm_arch_thumb_hwdiv;
835 /* Nonzero if we should use Neon to handle 64-bits operations rather
836 than core registers. */
837 int prefer_neon_for_64bits = 0;
839 /* Nonzero if we shouldn't use literal pools. */
840 bool arm_disable_literal_pool = false;
842 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
843 we must report the mode of the memory reference from
844 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
845 machine_mode output_memory_reference_mode;
847 /* The register number to be used for the PIC offset register. */
848 unsigned arm_pic_register = INVALID_REGNUM;
850 enum arm_pcs arm_pcs_default;
852 /* For an explanation of these variables, see final_prescan_insn below. */
853 int arm_ccfsm_state;
854 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
855 enum arm_cond_code arm_current_cc;
857 rtx arm_target_insn;
858 int arm_target_label;
859 /* The number of conditionally executed insns, including the current insn. */
860 int arm_condexec_count = 0;
861 /* A bitmask specifying the patterns for the IT block.
862 Zero means do not output an IT block before this insn. */
863 int arm_condexec_mask = 0;
864 /* The number of bits used in arm_condexec_mask. */
865 int arm_condexec_masklen = 0;
867 /* Nonzero if chip supports the ARMv8 CRC instructions. */
868 int arm_arch_crc = 0;
870 /* Nonzero if the core has a very small, high-latency, multiply unit. */
871 int arm_m_profile_small_mul = 0;
873 /* The condition codes of the ARM, and the inverse function. */
874 static const char * const arm_condition_codes[] =
876 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
877 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
880 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
881 int arm_regs_in_sequence[] =
883 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
886 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
887 #define streq(string1, string2) (strcmp (string1, string2) == 0)
889 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
890 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
891 | (1 << PIC_OFFSET_TABLE_REGNUM)))
893 /* Initialization code. */
895 struct processors
897 const char *const name;
898 enum processor_type core;
899 const char *arch;
900 enum base_architecture base_arch;
901 const unsigned long flags;
902 const struct tune_params *const tune;
906 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
907 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
908 prefetch_slots, \
909 l1_size, \
910 l1_line_size
912 /* arm generic vectorizer costs. */
913 static const
914 struct cpu_vec_costs arm_default_vec_cost = {
915 1, /* scalar_stmt_cost. */
916 1, /* scalar load_cost. */
917 1, /* scalar_store_cost. */
918 1, /* vec_stmt_cost. */
919 1, /* vec_to_scalar_cost. */
920 1, /* scalar_to_vec_cost. */
921 1, /* vec_align_load_cost. */
922 1, /* vec_unalign_load_cost. */
923 1, /* vec_unalign_store_cost. */
924 1, /* vec_store_cost. */
925 3, /* cond_taken_branch_cost. */
926 1, /* cond_not_taken_branch_cost. */
929 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
930 #include "aarch-cost-tables.h"
934 const struct cpu_cost_table cortexa9_extra_costs =
936 /* ALU */
938 0, /* arith. */
939 0, /* logical. */
940 0, /* shift. */
941 COSTS_N_INSNS (1), /* shift_reg. */
942 COSTS_N_INSNS (1), /* arith_shift. */
943 COSTS_N_INSNS (2), /* arith_shift_reg. */
944 0, /* log_shift. */
945 COSTS_N_INSNS (1), /* log_shift_reg. */
946 COSTS_N_INSNS (1), /* extend. */
947 COSTS_N_INSNS (2), /* extend_arith. */
948 COSTS_N_INSNS (1), /* bfi. */
949 COSTS_N_INSNS (1), /* bfx. */
950 0, /* clz. */
951 0, /* rev. */
952 0, /* non_exec. */
953 true /* non_exec_costs_exec. */
956 /* MULT SImode */
958 COSTS_N_INSNS (3), /* simple. */
959 COSTS_N_INSNS (3), /* flag_setting. */
960 COSTS_N_INSNS (2), /* extend. */
961 COSTS_N_INSNS (3), /* add. */
962 COSTS_N_INSNS (2), /* extend_add. */
963 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
965 /* MULT DImode */
967 0, /* simple (N/A). */
968 0, /* flag_setting (N/A). */
969 COSTS_N_INSNS (4), /* extend. */
970 0, /* add (N/A). */
971 COSTS_N_INSNS (4), /* extend_add. */
972 0 /* idiv (N/A). */
975 /* LD/ST */
977 COSTS_N_INSNS (2), /* load. */
978 COSTS_N_INSNS (2), /* load_sign_extend. */
979 COSTS_N_INSNS (2), /* ldrd. */
980 COSTS_N_INSNS (2), /* ldm_1st. */
981 1, /* ldm_regs_per_insn_1st. */
982 2, /* ldm_regs_per_insn_subsequent. */
983 COSTS_N_INSNS (5), /* loadf. */
984 COSTS_N_INSNS (5), /* loadd. */
985 COSTS_N_INSNS (1), /* load_unaligned. */
986 COSTS_N_INSNS (2), /* store. */
987 COSTS_N_INSNS (2), /* strd. */
988 COSTS_N_INSNS (2), /* stm_1st. */
989 1, /* stm_regs_per_insn_1st. */
990 2, /* stm_regs_per_insn_subsequent. */
991 COSTS_N_INSNS (1), /* storef. */
992 COSTS_N_INSNS (1), /* stored. */
993 COSTS_N_INSNS (1) /* store_unaligned. */
996 /* FP SFmode */
998 COSTS_N_INSNS (14), /* div. */
999 COSTS_N_INSNS (4), /* mult. */
1000 COSTS_N_INSNS (7), /* mult_addsub. */
1001 COSTS_N_INSNS (30), /* fma. */
1002 COSTS_N_INSNS (3), /* addsub. */
1003 COSTS_N_INSNS (1), /* fpconst. */
1004 COSTS_N_INSNS (1), /* neg. */
1005 COSTS_N_INSNS (3), /* compare. */
1006 COSTS_N_INSNS (3), /* widen. */
1007 COSTS_N_INSNS (3), /* narrow. */
1008 COSTS_N_INSNS (3), /* toint. */
1009 COSTS_N_INSNS (3), /* fromint. */
1010 COSTS_N_INSNS (3) /* roundint. */
1012 /* FP DFmode */
1014 COSTS_N_INSNS (24), /* div. */
1015 COSTS_N_INSNS (5), /* mult. */
1016 COSTS_N_INSNS (8), /* mult_addsub. */
1017 COSTS_N_INSNS (30), /* fma. */
1018 COSTS_N_INSNS (3), /* addsub. */
1019 COSTS_N_INSNS (1), /* fpconst. */
1020 COSTS_N_INSNS (1), /* neg. */
1021 COSTS_N_INSNS (3), /* compare. */
1022 COSTS_N_INSNS (3), /* widen. */
1023 COSTS_N_INSNS (3), /* narrow. */
1024 COSTS_N_INSNS (3), /* toint. */
1025 COSTS_N_INSNS (3), /* fromint. */
1026 COSTS_N_INSNS (3) /* roundint. */
1029 /* Vector */
1031 COSTS_N_INSNS (1) /* alu. */
1035 const struct cpu_cost_table cortexa8_extra_costs =
1037 /* ALU */
1039 0, /* arith. */
1040 0, /* logical. */
1041 COSTS_N_INSNS (1), /* shift. */
1042 0, /* shift_reg. */
1043 COSTS_N_INSNS (1), /* arith_shift. */
1044 0, /* arith_shift_reg. */
1045 COSTS_N_INSNS (1), /* log_shift. */
1046 0, /* log_shift_reg. */
1047 0, /* extend. */
1048 0, /* extend_arith. */
1049 0, /* bfi. */
1050 0, /* bfx. */
1051 0, /* clz. */
1052 0, /* rev. */
1053 0, /* non_exec. */
1054 true /* non_exec_costs_exec. */
1057 /* MULT SImode */
1059 COSTS_N_INSNS (1), /* simple. */
1060 COSTS_N_INSNS (1), /* flag_setting. */
1061 COSTS_N_INSNS (1), /* extend. */
1062 COSTS_N_INSNS (1), /* add. */
1063 COSTS_N_INSNS (1), /* extend_add. */
1064 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1066 /* MULT DImode */
1068 0, /* simple (N/A). */
1069 0, /* flag_setting (N/A). */
1070 COSTS_N_INSNS (2), /* extend. */
1071 0, /* add (N/A). */
1072 COSTS_N_INSNS (2), /* extend_add. */
1073 0 /* idiv (N/A). */
1076 /* LD/ST */
1078 COSTS_N_INSNS (1), /* load. */
1079 COSTS_N_INSNS (1), /* load_sign_extend. */
1080 COSTS_N_INSNS (1), /* ldrd. */
1081 COSTS_N_INSNS (1), /* ldm_1st. */
1082 1, /* ldm_regs_per_insn_1st. */
1083 2, /* ldm_regs_per_insn_subsequent. */
1084 COSTS_N_INSNS (1), /* loadf. */
1085 COSTS_N_INSNS (1), /* loadd. */
1086 COSTS_N_INSNS (1), /* load_unaligned. */
1087 COSTS_N_INSNS (1), /* store. */
1088 COSTS_N_INSNS (1), /* strd. */
1089 COSTS_N_INSNS (1), /* stm_1st. */
1090 1, /* stm_regs_per_insn_1st. */
1091 2, /* stm_regs_per_insn_subsequent. */
1092 COSTS_N_INSNS (1), /* storef. */
1093 COSTS_N_INSNS (1), /* stored. */
1094 COSTS_N_INSNS (1) /* store_unaligned. */
1097 /* FP SFmode */
1099 COSTS_N_INSNS (36), /* div. */
1100 COSTS_N_INSNS (11), /* mult. */
1101 COSTS_N_INSNS (20), /* mult_addsub. */
1102 COSTS_N_INSNS (30), /* fma. */
1103 COSTS_N_INSNS (9), /* addsub. */
1104 COSTS_N_INSNS (3), /* fpconst. */
1105 COSTS_N_INSNS (3), /* neg. */
1106 COSTS_N_INSNS (6), /* compare. */
1107 COSTS_N_INSNS (4), /* widen. */
1108 COSTS_N_INSNS (4), /* narrow. */
1109 COSTS_N_INSNS (8), /* toint. */
1110 COSTS_N_INSNS (8), /* fromint. */
1111 COSTS_N_INSNS (8) /* roundint. */
1113 /* FP DFmode */
1115 COSTS_N_INSNS (64), /* div. */
1116 COSTS_N_INSNS (16), /* mult. */
1117 COSTS_N_INSNS (25), /* mult_addsub. */
1118 COSTS_N_INSNS (30), /* fma. */
1119 COSTS_N_INSNS (9), /* addsub. */
1120 COSTS_N_INSNS (3), /* fpconst. */
1121 COSTS_N_INSNS (3), /* neg. */
1122 COSTS_N_INSNS (6), /* compare. */
1123 COSTS_N_INSNS (6), /* widen. */
1124 COSTS_N_INSNS (6), /* narrow. */
1125 COSTS_N_INSNS (8), /* toint. */
1126 COSTS_N_INSNS (8), /* fromint. */
1127 COSTS_N_INSNS (8) /* roundint. */
1130 /* Vector */
1132 COSTS_N_INSNS (1) /* alu. */
1136 const struct cpu_cost_table cortexa5_extra_costs =
1138 /* ALU */
1140 0, /* arith. */
1141 0, /* logical. */
1142 COSTS_N_INSNS (1), /* shift. */
1143 COSTS_N_INSNS (1), /* shift_reg. */
1144 COSTS_N_INSNS (1), /* arith_shift. */
1145 COSTS_N_INSNS (1), /* arith_shift_reg. */
1146 COSTS_N_INSNS (1), /* log_shift. */
1147 COSTS_N_INSNS (1), /* log_shift_reg. */
1148 COSTS_N_INSNS (1), /* extend. */
1149 COSTS_N_INSNS (1), /* extend_arith. */
1150 COSTS_N_INSNS (1), /* bfi. */
1151 COSTS_N_INSNS (1), /* bfx. */
1152 COSTS_N_INSNS (1), /* clz. */
1153 COSTS_N_INSNS (1), /* rev. */
1154 0, /* non_exec. */
1155 true /* non_exec_costs_exec. */
1159 /* MULT SImode */
1161 0, /* simple. */
1162 COSTS_N_INSNS (1), /* flag_setting. */
1163 COSTS_N_INSNS (1), /* extend. */
1164 COSTS_N_INSNS (1), /* add. */
1165 COSTS_N_INSNS (1), /* extend_add. */
1166 COSTS_N_INSNS (7) /* idiv. */
1168 /* MULT DImode */
1170 0, /* simple (N/A). */
1171 0, /* flag_setting (N/A). */
1172 COSTS_N_INSNS (1), /* extend. */
1173 0, /* add. */
1174 COSTS_N_INSNS (2), /* extend_add. */
1175 0 /* idiv (N/A). */
1178 /* LD/ST */
1180 COSTS_N_INSNS (1), /* load. */
1181 COSTS_N_INSNS (1), /* load_sign_extend. */
1182 COSTS_N_INSNS (6), /* ldrd. */
1183 COSTS_N_INSNS (1), /* ldm_1st. */
1184 1, /* ldm_regs_per_insn_1st. */
1185 2, /* ldm_regs_per_insn_subsequent. */
1186 COSTS_N_INSNS (2), /* loadf. */
1187 COSTS_N_INSNS (4), /* loadd. */
1188 COSTS_N_INSNS (1), /* load_unaligned. */
1189 COSTS_N_INSNS (1), /* store. */
1190 COSTS_N_INSNS (3), /* strd. */
1191 COSTS_N_INSNS (1), /* stm_1st. */
1192 1, /* stm_regs_per_insn_1st. */
1193 2, /* stm_regs_per_insn_subsequent. */
1194 COSTS_N_INSNS (2), /* storef. */
1195 COSTS_N_INSNS (2), /* stored. */
1196 COSTS_N_INSNS (1) /* store_unaligned. */
1199 /* FP SFmode */
1201 COSTS_N_INSNS (15), /* div. */
1202 COSTS_N_INSNS (3), /* mult. */
1203 COSTS_N_INSNS (7), /* mult_addsub. */
1204 COSTS_N_INSNS (7), /* fma. */
1205 COSTS_N_INSNS (3), /* addsub. */
1206 COSTS_N_INSNS (3), /* fpconst. */
1207 COSTS_N_INSNS (3), /* neg. */
1208 COSTS_N_INSNS (3), /* compare. */
1209 COSTS_N_INSNS (3), /* widen. */
1210 COSTS_N_INSNS (3), /* narrow. */
1211 COSTS_N_INSNS (3), /* toint. */
1212 COSTS_N_INSNS (3), /* fromint. */
1213 COSTS_N_INSNS (3) /* roundint. */
1215 /* FP DFmode */
1217 COSTS_N_INSNS (30), /* div. */
1218 COSTS_N_INSNS (6), /* mult. */
1219 COSTS_N_INSNS (10), /* mult_addsub. */
1220 COSTS_N_INSNS (7), /* fma. */
1221 COSTS_N_INSNS (3), /* addsub. */
1222 COSTS_N_INSNS (3), /* fpconst. */
1223 COSTS_N_INSNS (3), /* neg. */
1224 COSTS_N_INSNS (3), /* compare. */
1225 COSTS_N_INSNS (3), /* widen. */
1226 COSTS_N_INSNS (3), /* narrow. */
1227 COSTS_N_INSNS (3), /* toint. */
1228 COSTS_N_INSNS (3), /* fromint. */
1229 COSTS_N_INSNS (3) /* roundint. */
1232 /* Vector */
1234 COSTS_N_INSNS (1) /* alu. */
1239 const struct cpu_cost_table cortexa7_extra_costs =
1241 /* ALU */
1243 0, /* arith. */
1244 0, /* logical. */
1245 COSTS_N_INSNS (1), /* shift. */
1246 COSTS_N_INSNS (1), /* shift_reg. */
1247 COSTS_N_INSNS (1), /* arith_shift. */
1248 COSTS_N_INSNS (1), /* arith_shift_reg. */
1249 COSTS_N_INSNS (1), /* log_shift. */
1250 COSTS_N_INSNS (1), /* log_shift_reg. */
1251 COSTS_N_INSNS (1), /* extend. */
1252 COSTS_N_INSNS (1), /* extend_arith. */
1253 COSTS_N_INSNS (1), /* bfi. */
1254 COSTS_N_INSNS (1), /* bfx. */
1255 COSTS_N_INSNS (1), /* clz. */
1256 COSTS_N_INSNS (1), /* rev. */
1257 0, /* non_exec. */
1258 true /* non_exec_costs_exec. */
1262 /* MULT SImode */
1264 0, /* simple. */
1265 COSTS_N_INSNS (1), /* flag_setting. */
1266 COSTS_N_INSNS (1), /* extend. */
1267 COSTS_N_INSNS (1), /* add. */
1268 COSTS_N_INSNS (1), /* extend_add. */
1269 COSTS_N_INSNS (7) /* idiv. */
1271 /* MULT DImode */
1273 0, /* simple (N/A). */
1274 0, /* flag_setting (N/A). */
1275 COSTS_N_INSNS (1), /* extend. */
1276 0, /* add. */
1277 COSTS_N_INSNS (2), /* extend_add. */
1278 0 /* idiv (N/A). */
1281 /* LD/ST */
1283 COSTS_N_INSNS (1), /* load. */
1284 COSTS_N_INSNS (1), /* load_sign_extend. */
1285 COSTS_N_INSNS (3), /* ldrd. */
1286 COSTS_N_INSNS (1), /* ldm_1st. */
1287 1, /* ldm_regs_per_insn_1st. */
1288 2, /* ldm_regs_per_insn_subsequent. */
1289 COSTS_N_INSNS (2), /* loadf. */
1290 COSTS_N_INSNS (2), /* loadd. */
1291 COSTS_N_INSNS (1), /* load_unaligned. */
1292 COSTS_N_INSNS (1), /* store. */
1293 COSTS_N_INSNS (3), /* strd. */
1294 COSTS_N_INSNS (1), /* stm_1st. */
1295 1, /* stm_regs_per_insn_1st. */
1296 2, /* stm_regs_per_insn_subsequent. */
1297 COSTS_N_INSNS (2), /* storef. */
1298 COSTS_N_INSNS (2), /* stored. */
1299 COSTS_N_INSNS (1) /* store_unaligned. */
1302 /* FP SFmode */
1304 COSTS_N_INSNS (15), /* div. */
1305 COSTS_N_INSNS (3), /* mult. */
1306 COSTS_N_INSNS (7), /* mult_addsub. */
1307 COSTS_N_INSNS (7), /* fma. */
1308 COSTS_N_INSNS (3), /* addsub. */
1309 COSTS_N_INSNS (3), /* fpconst. */
1310 COSTS_N_INSNS (3), /* neg. */
1311 COSTS_N_INSNS (3), /* compare. */
1312 COSTS_N_INSNS (3), /* widen. */
1313 COSTS_N_INSNS (3), /* narrow. */
1314 COSTS_N_INSNS (3), /* toint. */
1315 COSTS_N_INSNS (3), /* fromint. */
1316 COSTS_N_INSNS (3) /* roundint. */
1318 /* FP DFmode */
1320 COSTS_N_INSNS (30), /* div. */
1321 COSTS_N_INSNS (6), /* mult. */
1322 COSTS_N_INSNS (10), /* mult_addsub. */
1323 COSTS_N_INSNS (7), /* fma. */
1324 COSTS_N_INSNS (3), /* addsub. */
1325 COSTS_N_INSNS (3), /* fpconst. */
1326 COSTS_N_INSNS (3), /* neg. */
1327 COSTS_N_INSNS (3), /* compare. */
1328 COSTS_N_INSNS (3), /* widen. */
1329 COSTS_N_INSNS (3), /* narrow. */
1330 COSTS_N_INSNS (3), /* toint. */
1331 COSTS_N_INSNS (3), /* fromint. */
1332 COSTS_N_INSNS (3) /* roundint. */
1335 /* Vector */
1337 COSTS_N_INSNS (1) /* alu. */
1341 const struct cpu_cost_table cortexa12_extra_costs =
1343 /* ALU */
1345 0, /* arith. */
1346 0, /* logical. */
1347 0, /* shift. */
1348 COSTS_N_INSNS (1), /* shift_reg. */
1349 COSTS_N_INSNS (1), /* arith_shift. */
1350 COSTS_N_INSNS (1), /* arith_shift_reg. */
1351 COSTS_N_INSNS (1), /* log_shift. */
1352 COSTS_N_INSNS (1), /* log_shift_reg. */
1353 0, /* extend. */
1354 COSTS_N_INSNS (1), /* extend_arith. */
1355 0, /* bfi. */
1356 COSTS_N_INSNS (1), /* bfx. */
1357 COSTS_N_INSNS (1), /* clz. */
1358 COSTS_N_INSNS (1), /* rev. */
1359 0, /* non_exec. */
1360 true /* non_exec_costs_exec. */
1362 /* MULT SImode */
1365 COSTS_N_INSNS (2), /* simple. */
1366 COSTS_N_INSNS (3), /* flag_setting. */
1367 COSTS_N_INSNS (2), /* extend. */
1368 COSTS_N_INSNS (3), /* add. */
1369 COSTS_N_INSNS (2), /* extend_add. */
1370 COSTS_N_INSNS (18) /* idiv. */
1372 /* MULT DImode */
1374 0, /* simple (N/A). */
1375 0, /* flag_setting (N/A). */
1376 COSTS_N_INSNS (3), /* extend. */
1377 0, /* add (N/A). */
1378 COSTS_N_INSNS (3), /* extend_add. */
1379 0 /* idiv (N/A). */
1382 /* LD/ST */
1384 COSTS_N_INSNS (3), /* load. */
1385 COSTS_N_INSNS (3), /* load_sign_extend. */
1386 COSTS_N_INSNS (3), /* ldrd. */
1387 COSTS_N_INSNS (3), /* ldm_1st. */
1388 1, /* ldm_regs_per_insn_1st. */
1389 2, /* ldm_regs_per_insn_subsequent. */
1390 COSTS_N_INSNS (3), /* loadf. */
1391 COSTS_N_INSNS (3), /* loadd. */
1392 0, /* load_unaligned. */
1393 0, /* store. */
1394 0, /* strd. */
1395 0, /* stm_1st. */
1396 1, /* stm_regs_per_insn_1st. */
1397 2, /* stm_regs_per_insn_subsequent. */
1398 COSTS_N_INSNS (2), /* storef. */
1399 COSTS_N_INSNS (2), /* stored. */
1400 0 /* store_unaligned. */
1403 /* FP SFmode */
1405 COSTS_N_INSNS (17), /* div. */
1406 COSTS_N_INSNS (4), /* mult. */
1407 COSTS_N_INSNS (8), /* mult_addsub. */
1408 COSTS_N_INSNS (8), /* fma. */
1409 COSTS_N_INSNS (4), /* addsub. */
1410 COSTS_N_INSNS (2), /* fpconst. */
1411 COSTS_N_INSNS (2), /* neg. */
1412 COSTS_N_INSNS (2), /* compare. */
1413 COSTS_N_INSNS (4), /* widen. */
1414 COSTS_N_INSNS (4), /* narrow. */
1415 COSTS_N_INSNS (4), /* toint. */
1416 COSTS_N_INSNS (4), /* fromint. */
1417 COSTS_N_INSNS (4) /* roundint. */
1419 /* FP DFmode */
1421 COSTS_N_INSNS (31), /* div. */
1422 COSTS_N_INSNS (4), /* mult. */
1423 COSTS_N_INSNS (8), /* mult_addsub. */
1424 COSTS_N_INSNS (8), /* fma. */
1425 COSTS_N_INSNS (4), /* addsub. */
1426 COSTS_N_INSNS (2), /* fpconst. */
1427 COSTS_N_INSNS (2), /* neg. */
1428 COSTS_N_INSNS (2), /* compare. */
1429 COSTS_N_INSNS (4), /* widen. */
1430 COSTS_N_INSNS (4), /* narrow. */
1431 COSTS_N_INSNS (4), /* toint. */
1432 COSTS_N_INSNS (4), /* fromint. */
1433 COSTS_N_INSNS (4) /* roundint. */
1436 /* Vector */
1438 COSTS_N_INSNS (1) /* alu. */
1442 const struct cpu_cost_table cortexa15_extra_costs =
1444 /* ALU */
1446 0, /* arith. */
1447 0, /* logical. */
1448 0, /* shift. */
1449 0, /* shift_reg. */
1450 COSTS_N_INSNS (1), /* arith_shift. */
1451 COSTS_N_INSNS (1), /* arith_shift_reg. */
1452 COSTS_N_INSNS (1), /* log_shift. */
1453 COSTS_N_INSNS (1), /* log_shift_reg. */
1454 0, /* extend. */
1455 COSTS_N_INSNS (1), /* extend_arith. */
1456 COSTS_N_INSNS (1), /* bfi. */
1457 0, /* bfx. */
1458 0, /* clz. */
1459 0, /* rev. */
1460 0, /* non_exec. */
1461 true /* non_exec_costs_exec. */
1463 /* MULT SImode */
1466 COSTS_N_INSNS (2), /* simple. */
1467 COSTS_N_INSNS (3), /* flag_setting. */
1468 COSTS_N_INSNS (2), /* extend. */
1469 COSTS_N_INSNS (2), /* add. */
1470 COSTS_N_INSNS (2), /* extend_add. */
1471 COSTS_N_INSNS (18) /* idiv. */
1473 /* MULT DImode */
1475 0, /* simple (N/A). */
1476 0, /* flag_setting (N/A). */
1477 COSTS_N_INSNS (3), /* extend. */
1478 0, /* add (N/A). */
1479 COSTS_N_INSNS (3), /* extend_add. */
1480 0 /* idiv (N/A). */
1483 /* LD/ST */
1485 COSTS_N_INSNS (3), /* load. */
1486 COSTS_N_INSNS (3), /* load_sign_extend. */
1487 COSTS_N_INSNS (3), /* ldrd. */
1488 COSTS_N_INSNS (4), /* ldm_1st. */
1489 1, /* ldm_regs_per_insn_1st. */
1490 2, /* ldm_regs_per_insn_subsequent. */
1491 COSTS_N_INSNS (4), /* loadf. */
1492 COSTS_N_INSNS (4), /* loadd. */
1493 0, /* load_unaligned. */
1494 0, /* store. */
1495 0, /* strd. */
1496 COSTS_N_INSNS (1), /* stm_1st. */
1497 1, /* stm_regs_per_insn_1st. */
1498 2, /* stm_regs_per_insn_subsequent. */
1499 0, /* storef. */
1500 0, /* stored. */
1501 0 /* store_unaligned. */
1504 /* FP SFmode */
1506 COSTS_N_INSNS (17), /* div. */
1507 COSTS_N_INSNS (4), /* mult. */
1508 COSTS_N_INSNS (8), /* mult_addsub. */
1509 COSTS_N_INSNS (8), /* fma. */
1510 COSTS_N_INSNS (4), /* addsub. */
1511 COSTS_N_INSNS (2), /* fpconst. */
1512 COSTS_N_INSNS (2), /* neg. */
1513 COSTS_N_INSNS (5), /* compare. */
1514 COSTS_N_INSNS (4), /* widen. */
1515 COSTS_N_INSNS (4), /* narrow. */
1516 COSTS_N_INSNS (4), /* toint. */
1517 COSTS_N_INSNS (4), /* fromint. */
1518 COSTS_N_INSNS (4) /* roundint. */
1520 /* FP DFmode */
1522 COSTS_N_INSNS (31), /* div. */
1523 COSTS_N_INSNS (4), /* mult. */
1524 COSTS_N_INSNS (8), /* mult_addsub. */
1525 COSTS_N_INSNS (8), /* fma. */
1526 COSTS_N_INSNS (4), /* addsub. */
1527 COSTS_N_INSNS (2), /* fpconst. */
1528 COSTS_N_INSNS (2), /* neg. */
1529 COSTS_N_INSNS (2), /* compare. */
1530 COSTS_N_INSNS (4), /* widen. */
1531 COSTS_N_INSNS (4), /* narrow. */
1532 COSTS_N_INSNS (4), /* toint. */
1533 COSTS_N_INSNS (4), /* fromint. */
1534 COSTS_N_INSNS (4) /* roundint. */
1537 /* Vector */
1539 COSTS_N_INSNS (1) /* alu. */
1543 const struct cpu_cost_table v7m_extra_costs =
1545 /* ALU */
1547 0, /* arith. */
1548 0, /* logical. */
1549 0, /* shift. */
1550 0, /* shift_reg. */
1551 0, /* arith_shift. */
1552 COSTS_N_INSNS (1), /* arith_shift_reg. */
1553 0, /* log_shift. */
1554 COSTS_N_INSNS (1), /* log_shift_reg. */
1555 0, /* extend. */
1556 COSTS_N_INSNS (1), /* extend_arith. */
1557 0, /* bfi. */
1558 0, /* bfx. */
1559 0, /* clz. */
1560 0, /* rev. */
1561 COSTS_N_INSNS (1), /* non_exec. */
1562 false /* non_exec_costs_exec. */
1565 /* MULT SImode */
1567 COSTS_N_INSNS (1), /* simple. */
1568 COSTS_N_INSNS (1), /* flag_setting. */
1569 COSTS_N_INSNS (2), /* extend. */
1570 COSTS_N_INSNS (1), /* add. */
1571 COSTS_N_INSNS (3), /* extend_add. */
1572 COSTS_N_INSNS (8) /* idiv. */
1574 /* MULT DImode */
1576 0, /* simple (N/A). */
1577 0, /* flag_setting (N/A). */
1578 COSTS_N_INSNS (2), /* extend. */
1579 0, /* add (N/A). */
1580 COSTS_N_INSNS (3), /* extend_add. */
1581 0 /* idiv (N/A). */
1584 /* LD/ST */
1586 COSTS_N_INSNS (2), /* load. */
1587 0, /* load_sign_extend. */
1588 COSTS_N_INSNS (3), /* ldrd. */
1589 COSTS_N_INSNS (2), /* ldm_1st. */
1590 1, /* ldm_regs_per_insn_1st. */
1591 1, /* ldm_regs_per_insn_subsequent. */
1592 COSTS_N_INSNS (2), /* loadf. */
1593 COSTS_N_INSNS (3), /* loadd. */
1594 COSTS_N_INSNS (1), /* load_unaligned. */
1595 COSTS_N_INSNS (2), /* store. */
1596 COSTS_N_INSNS (3), /* strd. */
1597 COSTS_N_INSNS (2), /* stm_1st. */
1598 1, /* stm_regs_per_insn_1st. */
1599 1, /* stm_regs_per_insn_subsequent. */
1600 COSTS_N_INSNS (2), /* storef. */
1601 COSTS_N_INSNS (3), /* stored. */
1602 COSTS_N_INSNS (1) /* store_unaligned. */
1605 /* FP SFmode */
1607 COSTS_N_INSNS (7), /* div. */
1608 COSTS_N_INSNS (2), /* mult. */
1609 COSTS_N_INSNS (5), /* mult_addsub. */
1610 COSTS_N_INSNS (3), /* fma. */
1611 COSTS_N_INSNS (1), /* addsub. */
1612 0, /* fpconst. */
1613 0, /* neg. */
1614 0, /* compare. */
1615 0, /* widen. */
1616 0, /* narrow. */
1617 0, /* toint. */
1618 0, /* fromint. */
1619 0 /* roundint. */
1621 /* FP DFmode */
1623 COSTS_N_INSNS (15), /* div. */
1624 COSTS_N_INSNS (5), /* mult. */
1625 COSTS_N_INSNS (7), /* mult_addsub. */
1626 COSTS_N_INSNS (7), /* fma. */
1627 COSTS_N_INSNS (3), /* addsub. */
1628 0, /* fpconst. */
1629 0, /* neg. */
1630 0, /* compare. */
1631 0, /* widen. */
1632 0, /* narrow. */
1633 0, /* toint. */
1634 0, /* fromint. */
1635 0 /* roundint. */
1638 /* Vector */
1640 COSTS_N_INSNS (1) /* alu. */
1644 const struct tune_params arm_slowmul_tune =
1646 arm_slowmul_rtx_costs,
1647 NULL,
1648 NULL, /* Sched adj cost. */
1649 3, /* Constant limit. */
1650 5, /* Max cond insns. */
1651 ARM_PREFETCH_NOT_BENEFICIAL,
1652 true, /* Prefer constant pool. */
1653 arm_default_branch_cost,
1654 false, /* Prefer LDRD/STRD. */
1655 {true, true}, /* Prefer non short circuit. */
1656 &arm_default_vec_cost, /* Vectorizer costs. */
1657 false, /* Prefer Neon for 64-bits bitops. */
1658 false, false, /* Prefer 32-bit encodings. */
1659 false, /* Prefer Neon for stringops. */
1660 8 /* Maximum insns to inline memset. */
1663 const struct tune_params arm_fastmul_tune =
1665 arm_fastmul_rtx_costs,
1666 NULL,
1667 NULL, /* Sched adj cost. */
1668 1, /* Constant limit. */
1669 5, /* Max cond insns. */
1670 ARM_PREFETCH_NOT_BENEFICIAL,
1671 true, /* Prefer constant pool. */
1672 arm_default_branch_cost,
1673 false, /* Prefer LDRD/STRD. */
1674 {true, true}, /* Prefer non short circuit. */
1675 &arm_default_vec_cost, /* Vectorizer costs. */
1676 false, /* Prefer Neon for 64-bits bitops. */
1677 false, false, /* Prefer 32-bit encodings. */
1678 false, /* Prefer Neon for stringops. */
1679 8 /* Maximum insns to inline memset. */
1682 /* StrongARM has early execution of branches, so a sequence that is worth
1683 skipping is shorter. Set max_insns_skipped to a lower value. */
1685 const struct tune_params arm_strongarm_tune =
1687 arm_fastmul_rtx_costs,
1688 NULL,
1689 NULL, /* Sched adj cost. */
1690 1, /* Constant limit. */
1691 3, /* Max cond insns. */
1692 ARM_PREFETCH_NOT_BENEFICIAL,
1693 true, /* Prefer constant pool. */
1694 arm_default_branch_cost,
1695 false, /* Prefer LDRD/STRD. */
1696 {true, true}, /* Prefer non short circuit. */
1697 &arm_default_vec_cost, /* Vectorizer costs. */
1698 false, /* Prefer Neon for 64-bits bitops. */
1699 false, false, /* Prefer 32-bit encodings. */
1700 false, /* Prefer Neon for stringops. */
1701 8 /* Maximum insns to inline memset. */
1704 const struct tune_params arm_xscale_tune =
1706 arm_xscale_rtx_costs,
1707 NULL,
1708 xscale_sched_adjust_cost,
1709 2, /* Constant limit. */
1710 3, /* Max cond insns. */
1711 ARM_PREFETCH_NOT_BENEFICIAL,
1712 true, /* Prefer constant pool. */
1713 arm_default_branch_cost,
1714 false, /* Prefer LDRD/STRD. */
1715 {true, true}, /* Prefer non short circuit. */
1716 &arm_default_vec_cost, /* Vectorizer costs. */
1717 false, /* Prefer Neon for 64-bits bitops. */
1718 false, false, /* Prefer 32-bit encodings. */
1719 false, /* Prefer Neon for stringops. */
1720 8 /* Maximum insns to inline memset. */
1723 const struct tune_params arm_9e_tune =
1725 arm_9e_rtx_costs,
1726 NULL,
1727 NULL, /* Sched adj cost. */
1728 1, /* Constant limit. */
1729 5, /* Max cond insns. */
1730 ARM_PREFETCH_NOT_BENEFICIAL,
1731 true, /* Prefer constant pool. */
1732 arm_default_branch_cost,
1733 false, /* Prefer LDRD/STRD. */
1734 {true, true}, /* Prefer non short circuit. */
1735 &arm_default_vec_cost, /* Vectorizer costs. */
1736 false, /* Prefer Neon for 64-bits bitops. */
1737 false, false, /* Prefer 32-bit encodings. */
1738 false, /* Prefer Neon for stringops. */
1739 8 /* Maximum insns to inline memset. */
1742 const struct tune_params arm_v6t2_tune =
1744 arm_9e_rtx_costs,
1745 NULL,
1746 NULL, /* Sched adj cost. */
1747 1, /* Constant limit. */
1748 5, /* Max cond insns. */
1749 ARM_PREFETCH_NOT_BENEFICIAL,
1750 false, /* Prefer constant pool. */
1751 arm_default_branch_cost,
1752 false, /* Prefer LDRD/STRD. */
1753 {true, true}, /* Prefer non short circuit. */
1754 &arm_default_vec_cost, /* Vectorizer costs. */
1755 false, /* Prefer Neon for 64-bits bitops. */
1756 false, false, /* Prefer 32-bit encodings. */
1757 false, /* Prefer Neon for stringops. */
1758 8 /* Maximum insns to inline memset. */
1761 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1762 const struct tune_params arm_cortex_tune =
1764 arm_9e_rtx_costs,
1765 &generic_extra_costs,
1766 NULL, /* Sched adj cost. */
1767 1, /* Constant limit. */
1768 5, /* Max cond insns. */
1769 ARM_PREFETCH_NOT_BENEFICIAL,
1770 false, /* Prefer constant pool. */
1771 arm_default_branch_cost,
1772 false, /* Prefer LDRD/STRD. */
1773 {true, true}, /* Prefer non short circuit. */
1774 &arm_default_vec_cost, /* Vectorizer costs. */
1775 false, /* Prefer Neon for 64-bits bitops. */
1776 false, false, /* Prefer 32-bit encodings. */
1777 false, /* Prefer Neon for stringops. */
1778 8 /* Maximum insns to inline memset. */
1781 const struct tune_params arm_cortex_a8_tune =
1783 arm_9e_rtx_costs,
1784 &cortexa8_extra_costs,
1785 NULL, /* Sched adj cost. */
1786 1, /* Constant limit. */
1787 5, /* Max cond insns. */
1788 ARM_PREFETCH_NOT_BENEFICIAL,
1789 false, /* Prefer constant pool. */
1790 arm_default_branch_cost,
1791 false, /* Prefer LDRD/STRD. */
1792 {true, true}, /* Prefer non short circuit. */
1793 &arm_default_vec_cost, /* Vectorizer costs. */
1794 false, /* Prefer Neon for 64-bits bitops. */
1795 false, false, /* Prefer 32-bit encodings. */
1796 true, /* Prefer Neon for stringops. */
1797 8 /* Maximum insns to inline memset. */
1800 const struct tune_params arm_cortex_a7_tune =
1802 arm_9e_rtx_costs,
1803 &cortexa7_extra_costs,
1804 NULL,
1805 1, /* Constant limit. */
1806 5, /* Max cond insns. */
1807 ARM_PREFETCH_NOT_BENEFICIAL,
1808 false, /* Prefer constant pool. */
1809 arm_default_branch_cost,
1810 false, /* Prefer LDRD/STRD. */
1811 {true, true}, /* Prefer non short circuit. */
1812 &arm_default_vec_cost, /* Vectorizer costs. */
1813 false, /* Prefer Neon for 64-bits bitops. */
1814 false, false, /* Prefer 32-bit encodings. */
1815 true, /* Prefer Neon for stringops. */
1816 8 /* Maximum insns to inline memset. */
1819 const struct tune_params arm_cortex_a15_tune =
1821 arm_9e_rtx_costs,
1822 &cortexa15_extra_costs,
1823 NULL, /* Sched adj cost. */
1824 1, /* Constant limit. */
1825 2, /* Max cond insns. */
1826 ARM_PREFETCH_NOT_BENEFICIAL,
1827 false, /* Prefer constant pool. */
1828 arm_default_branch_cost,
1829 true, /* Prefer LDRD/STRD. */
1830 {true, true}, /* Prefer non short circuit. */
1831 &arm_default_vec_cost, /* Vectorizer costs. */
1832 false, /* Prefer Neon for 64-bits bitops. */
1833 true, true, /* Prefer 32-bit encodings. */
1834 true, /* Prefer Neon for stringops. */
1835 8 /* Maximum insns to inline memset. */
1838 const struct tune_params arm_cortex_a53_tune =
1840 arm_9e_rtx_costs,
1841 &cortexa53_extra_costs,
1842 NULL, /* Scheduler cost adjustment. */
1843 1, /* Constant limit. */
1844 5, /* Max cond insns. */
1845 ARM_PREFETCH_NOT_BENEFICIAL,
1846 false, /* Prefer constant pool. */
1847 arm_default_branch_cost,
1848 false, /* Prefer LDRD/STRD. */
1849 {true, true}, /* Prefer non short circuit. */
1850 &arm_default_vec_cost, /* Vectorizer costs. */
1851 false, /* Prefer Neon for 64-bits bitops. */
1852 false, false, /* Prefer 32-bit encodings. */
1853 false, /* Prefer Neon for stringops. */
1854 8 /* Maximum insns to inline memset. */
1857 const struct tune_params arm_cortex_a57_tune =
1859 arm_9e_rtx_costs,
1860 &cortexa57_extra_costs,
1861 NULL, /* Scheduler cost adjustment. */
1862 1, /* Constant limit. */
1863 2, /* Max cond insns. */
1864 ARM_PREFETCH_NOT_BENEFICIAL,
1865 false, /* Prefer constant pool. */
1866 arm_default_branch_cost,
1867 true, /* Prefer LDRD/STRD. */
1868 {true, true}, /* Prefer non short circuit. */
1869 &arm_default_vec_cost, /* Vectorizer costs. */
1870 false, /* Prefer Neon for 64-bits bitops. */
1871 true, true, /* Prefer 32-bit encodings. */
1872 false, /* Prefer Neon for stringops. */
1873 8 /* Maximum insns to inline memset. */
1876 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1877 less appealing. Set max_insns_skipped to a low value. */
1879 const struct tune_params arm_cortex_a5_tune =
1881 arm_9e_rtx_costs,
1882 &cortexa5_extra_costs,
1883 NULL, /* Sched adj cost. */
1884 1, /* Constant limit. */
1885 1, /* Max cond insns. */
1886 ARM_PREFETCH_NOT_BENEFICIAL,
1887 false, /* Prefer constant pool. */
1888 arm_cortex_a5_branch_cost,
1889 false, /* Prefer LDRD/STRD. */
1890 {false, false}, /* Prefer non short circuit. */
1891 &arm_default_vec_cost, /* Vectorizer costs. */
1892 false, /* Prefer Neon for 64-bits bitops. */
1893 false, false, /* Prefer 32-bit encodings. */
1894 true, /* Prefer Neon for stringops. */
1895 8 /* Maximum insns to inline memset. */
1898 const struct tune_params arm_cortex_a9_tune =
1900 arm_9e_rtx_costs,
1901 &cortexa9_extra_costs,
1902 cortex_a9_sched_adjust_cost,
1903 1, /* Constant limit. */
1904 5, /* Max cond insns. */
1905 ARM_PREFETCH_BENEFICIAL(4,32,32),
1906 false, /* Prefer constant pool. */
1907 arm_default_branch_cost,
1908 false, /* Prefer LDRD/STRD. */
1909 {true, true}, /* Prefer non short circuit. */
1910 &arm_default_vec_cost, /* Vectorizer costs. */
1911 false, /* Prefer Neon for 64-bits bitops. */
1912 false, false, /* Prefer 32-bit encodings. */
1913 false, /* Prefer Neon for stringops. */
1914 8 /* Maximum insns to inline memset. */
1917 const struct tune_params arm_cortex_a12_tune =
1919 arm_9e_rtx_costs,
1920 &cortexa12_extra_costs,
1921 NULL,
1922 1, /* Constant limit. */
1923 5, /* Max cond insns. */
1924 ARM_PREFETCH_BENEFICIAL(4,32,32),
1925 false, /* Prefer constant pool. */
1926 arm_default_branch_cost,
1927 true, /* Prefer LDRD/STRD. */
1928 {true, true}, /* Prefer non short circuit. */
1929 &arm_default_vec_cost, /* Vectorizer costs. */
1930 false, /* Prefer Neon for 64-bits bitops. */
1931 false, false, /* Prefer 32-bit encodings. */
1932 true, /* Prefer Neon for stringops. */
1933 8 /* Maximum insns to inline memset. */
1936 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
1937 cycle to execute each. An LDR from the constant pool also takes two cycles
1938 to execute, but mildly increases pipelining opportunity (consecutive
1939 loads/stores can be pipelined together, saving one cycle), and may also
1940 improve icache utilisation. Hence we prefer the constant pool for such
1941 processors. */
1943 const struct tune_params arm_v7m_tune =
1945 arm_9e_rtx_costs,
1946 &v7m_extra_costs,
1947 NULL, /* Sched adj cost. */
1948 1, /* Constant limit. */
1949 2, /* Max cond insns. */
1950 ARM_PREFETCH_NOT_BENEFICIAL,
1951 true, /* Prefer constant pool. */
1952 arm_cortex_m_branch_cost,
1953 false, /* Prefer LDRD/STRD. */
1954 {false, false}, /* Prefer non short circuit. */
1955 &arm_default_vec_cost, /* Vectorizer costs. */
1956 false, /* Prefer Neon for 64-bits bitops. */
1957 false, false, /* Prefer 32-bit encodings. */
1958 false, /* Prefer Neon for stringops. */
1959 8 /* Maximum insns to inline memset. */
1962 /* Cortex-M7 tuning. */
1964 const struct tune_params arm_cortex_m7_tune =
1966 arm_9e_rtx_costs,
1967 &v7m_extra_costs,
1968 NULL, /* Sched adj cost. */
1969 0, /* Constant limit. */
1970 0, /* Max cond insns. */
1971 ARM_PREFETCH_NOT_BENEFICIAL,
1972 true, /* Prefer constant pool. */
1973 arm_cortex_m_branch_cost,
1974 false, /* Prefer LDRD/STRD. */
1975 {true, true}, /* Prefer non short circuit. */
1976 &arm_default_vec_cost, /* Vectorizer costs. */
1977 false, /* Prefer Neon for 64-bits bitops. */
1978 false, false, /* Prefer 32-bit encodings. */
1979 false, /* Prefer Neon for stringops. */
1980 8 /* Maximum insns to inline memset. */
1983 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1984 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1985 const struct tune_params arm_v6m_tune =
1987 arm_9e_rtx_costs,
1988 NULL,
1989 NULL, /* Sched adj cost. */
1990 1, /* Constant limit. */
1991 5, /* Max cond insns. */
1992 ARM_PREFETCH_NOT_BENEFICIAL,
1993 false, /* Prefer constant pool. */
1994 arm_default_branch_cost,
1995 false, /* Prefer LDRD/STRD. */
1996 {false, false}, /* Prefer non short circuit. */
1997 &arm_default_vec_cost, /* Vectorizer costs. */
1998 false, /* Prefer Neon for 64-bits bitops. */
1999 false, false, /* Prefer 32-bit encodings. */
2000 false, /* Prefer Neon for stringops. */
2001 8 /* Maximum insns to inline memset. */
2004 const struct tune_params arm_fa726te_tune =
2006 arm_9e_rtx_costs,
2007 NULL,
2008 fa726te_sched_adjust_cost,
2009 1, /* Constant limit. */
2010 5, /* Max cond insns. */
2011 ARM_PREFETCH_NOT_BENEFICIAL,
2012 true, /* Prefer constant pool. */
2013 arm_default_branch_cost,
2014 false, /* Prefer LDRD/STRD. */
2015 {true, true}, /* Prefer non short circuit. */
2016 &arm_default_vec_cost, /* Vectorizer costs. */
2017 false, /* Prefer Neon for 64-bits bitops. */
2018 false, false, /* Prefer 32-bit encodings. */
2019 false, /* Prefer Neon for stringops. */
2020 8 /* Maximum insns to inline memset. */
2024 /* Not all of these give usefully different compilation alternatives,
2025 but there is no simple way of generalizing them. */
2026 static const struct processors all_cores[] =
2028 /* ARM Cores */
2029 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2030 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2031 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2032 #include "arm-cores.def"
2033 #undef ARM_CORE
2034 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2037 static const struct processors all_architectures[] =
2039 /* ARM Architectures */
2040 /* We don't specify tuning costs here as it will be figured out
2041 from the core. */
2043 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2044 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2045 #include "arm-arches.def"
2046 #undef ARM_ARCH
2047 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2051 /* These are populated as commandline arguments are processed, or NULL
2052 if not specified. */
2053 static const struct processors *arm_selected_arch;
2054 static const struct processors *arm_selected_cpu;
2055 static const struct processors *arm_selected_tune;
2057 /* The name of the preprocessor macro to define for this architecture. */
2059 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
2061 /* Available values for -mfpu=. */
2063 static const struct arm_fpu_desc all_fpus[] =
2065 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2066 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2067 #include "arm-fpus.def"
2068 #undef ARM_FPU
2072 /* Supported TLS relocations. */
2074 enum tls_reloc {
2075 TLS_GD32,
2076 TLS_LDM32,
2077 TLS_LDO32,
2078 TLS_IE32,
2079 TLS_LE32,
2080 TLS_DESCSEQ /* GNU scheme */
2083 /* The maximum number of insns to be used when loading a constant. */
2084 inline static int
2085 arm_constant_limit (bool size_p)
2087 return size_p ? 1 : current_tune->constant_limit;
2090 /* Emit an insn that's a simple single-set. Both the operands must be known
2091 to be valid. */
2092 inline static rtx_insn *
2093 emit_set_insn (rtx x, rtx y)
2095 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
2098 /* Return the number of bits set in VALUE. */
2099 static unsigned
2100 bit_count (unsigned long value)
2102 unsigned long count = 0;
2104 while (value)
2106 count++;
2107 value &= value - 1; /* Clear the least-significant set bit. */
2110 return count;
2113 typedef struct
2115 machine_mode mode;
2116 const char *name;
2117 } arm_fixed_mode_set;
2119 /* A small helper for setting fixed-point library libfuncs. */
2121 static void
2122 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2123 const char *funcname, const char *modename,
2124 int num_suffix)
2126 char buffer[50];
2128 if (num_suffix == 0)
2129 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2130 else
2131 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2133 set_optab_libfunc (optable, mode, buffer);
2136 static void
2137 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2138 machine_mode from, const char *funcname,
2139 const char *toname, const char *fromname)
2141 char buffer[50];
2142 const char *maybe_suffix_2 = "";
2144 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2145 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2146 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2147 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2148 maybe_suffix_2 = "2";
2150 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2151 maybe_suffix_2);
2153 set_conv_libfunc (optable, to, from, buffer);
2156 /* Set up library functions unique to ARM. */
2158 static void
2159 arm_init_libfuncs (void)
2161 /* For Linux, we have access to kernel support for atomic operations. */
2162 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2163 init_sync_libfuncs (2 * UNITS_PER_WORD);
2165 /* There are no special library functions unless we are using the
2166 ARM BPABI. */
2167 if (!TARGET_BPABI)
2168 return;
2170 /* The functions below are described in Section 4 of the "Run-Time
2171 ABI for the ARM architecture", Version 1.0. */
2173 /* Double-precision floating-point arithmetic. Table 2. */
2174 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2175 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2176 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2177 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2178 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2180 /* Double-precision comparisons. Table 3. */
2181 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2182 set_optab_libfunc (ne_optab, DFmode, NULL);
2183 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2184 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2185 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2186 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2187 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2189 /* Single-precision floating-point arithmetic. Table 4. */
2190 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2191 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2192 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2193 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2194 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2196 /* Single-precision comparisons. Table 5. */
2197 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2198 set_optab_libfunc (ne_optab, SFmode, NULL);
2199 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2200 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2201 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2202 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2203 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2205 /* Floating-point to integer conversions. Table 6. */
2206 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2207 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2208 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2209 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2210 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2211 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2212 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2213 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2215 /* Conversions between floating types. Table 7. */
2216 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2217 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2219 /* Integer to floating-point conversions. Table 8. */
2220 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2221 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2222 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2223 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2224 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2225 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2226 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2227 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2229 /* Long long. Table 9. */
2230 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2231 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2232 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2233 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2234 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2235 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2236 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2237 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2239 /* Integer (32/32->32) division. \S 4.3.1. */
2240 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2241 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2243 /* The divmod functions are designed so that they can be used for
2244 plain division, even though they return both the quotient and the
2245 remainder. The quotient is returned in the usual location (i.e.,
2246 r0 for SImode, {r0, r1} for DImode), just as would be expected
2247 for an ordinary division routine. Because the AAPCS calling
2248 conventions specify that all of { r0, r1, r2, r3 } are
2249 callee-saved registers, there is no need to tell the compiler
2250 explicitly that those registers are clobbered by these
2251 routines. */
2252 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2253 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2255 /* For SImode division the ABI provides div-without-mod routines,
2256 which are faster. */
2257 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2258 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2260 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2261 divmod libcalls instead. */
2262 set_optab_libfunc (smod_optab, DImode, NULL);
2263 set_optab_libfunc (umod_optab, DImode, NULL);
2264 set_optab_libfunc (smod_optab, SImode, NULL);
2265 set_optab_libfunc (umod_optab, SImode, NULL);
2267 /* Half-precision float operations. The compiler handles all operations
2268 with NULL libfuncs by converting the SFmode. */
2269 switch (arm_fp16_format)
2271 case ARM_FP16_FORMAT_IEEE:
2272 case ARM_FP16_FORMAT_ALTERNATIVE:
2274 /* Conversions. */
2275 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2276 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2277 ? "__gnu_f2h_ieee"
2278 : "__gnu_f2h_alternative"));
2279 set_conv_libfunc (sext_optab, SFmode, HFmode,
2280 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2281 ? "__gnu_h2f_ieee"
2282 : "__gnu_h2f_alternative"));
2284 /* Arithmetic. */
2285 set_optab_libfunc (add_optab, HFmode, NULL);
2286 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2287 set_optab_libfunc (smul_optab, HFmode, NULL);
2288 set_optab_libfunc (neg_optab, HFmode, NULL);
2289 set_optab_libfunc (sub_optab, HFmode, NULL);
2291 /* Comparisons. */
2292 set_optab_libfunc (eq_optab, HFmode, NULL);
2293 set_optab_libfunc (ne_optab, HFmode, NULL);
2294 set_optab_libfunc (lt_optab, HFmode, NULL);
2295 set_optab_libfunc (le_optab, HFmode, NULL);
2296 set_optab_libfunc (ge_optab, HFmode, NULL);
2297 set_optab_libfunc (gt_optab, HFmode, NULL);
2298 set_optab_libfunc (unord_optab, HFmode, NULL);
2299 break;
2301 default:
2302 break;
2305 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2307 const arm_fixed_mode_set fixed_arith_modes[] =
2309 { QQmode, "qq" },
2310 { UQQmode, "uqq" },
2311 { HQmode, "hq" },
2312 { UHQmode, "uhq" },
2313 { SQmode, "sq" },
2314 { USQmode, "usq" },
2315 { DQmode, "dq" },
2316 { UDQmode, "udq" },
2317 { TQmode, "tq" },
2318 { UTQmode, "utq" },
2319 { HAmode, "ha" },
2320 { UHAmode, "uha" },
2321 { SAmode, "sa" },
2322 { USAmode, "usa" },
2323 { DAmode, "da" },
2324 { UDAmode, "uda" },
2325 { TAmode, "ta" },
2326 { UTAmode, "uta" }
2328 const arm_fixed_mode_set fixed_conv_modes[] =
2330 { QQmode, "qq" },
2331 { UQQmode, "uqq" },
2332 { HQmode, "hq" },
2333 { UHQmode, "uhq" },
2334 { SQmode, "sq" },
2335 { USQmode, "usq" },
2336 { DQmode, "dq" },
2337 { UDQmode, "udq" },
2338 { TQmode, "tq" },
2339 { UTQmode, "utq" },
2340 { HAmode, "ha" },
2341 { UHAmode, "uha" },
2342 { SAmode, "sa" },
2343 { USAmode, "usa" },
2344 { DAmode, "da" },
2345 { UDAmode, "uda" },
2346 { TAmode, "ta" },
2347 { UTAmode, "uta" },
2348 { QImode, "qi" },
2349 { HImode, "hi" },
2350 { SImode, "si" },
2351 { DImode, "di" },
2352 { TImode, "ti" },
2353 { SFmode, "sf" },
2354 { DFmode, "df" }
2356 unsigned int i, j;
2358 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2360 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2361 "add", fixed_arith_modes[i].name, 3);
2362 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2363 "ssadd", fixed_arith_modes[i].name, 3);
2364 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2365 "usadd", fixed_arith_modes[i].name, 3);
2366 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2367 "sub", fixed_arith_modes[i].name, 3);
2368 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2369 "sssub", fixed_arith_modes[i].name, 3);
2370 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2371 "ussub", fixed_arith_modes[i].name, 3);
2372 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2373 "mul", fixed_arith_modes[i].name, 3);
2374 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2375 "ssmul", fixed_arith_modes[i].name, 3);
2376 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2377 "usmul", fixed_arith_modes[i].name, 3);
2378 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2379 "div", fixed_arith_modes[i].name, 3);
2380 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2381 "udiv", fixed_arith_modes[i].name, 3);
2382 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2383 "ssdiv", fixed_arith_modes[i].name, 3);
2384 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2385 "usdiv", fixed_arith_modes[i].name, 3);
2386 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2387 "neg", fixed_arith_modes[i].name, 2);
2388 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2389 "ssneg", fixed_arith_modes[i].name, 2);
2390 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2391 "usneg", fixed_arith_modes[i].name, 2);
2392 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2393 "ashl", fixed_arith_modes[i].name, 3);
2394 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2395 "ashr", fixed_arith_modes[i].name, 3);
2396 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2397 "lshr", fixed_arith_modes[i].name, 3);
2398 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2399 "ssashl", fixed_arith_modes[i].name, 3);
2400 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2401 "usashl", fixed_arith_modes[i].name, 3);
2402 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2403 "cmp", fixed_arith_modes[i].name, 2);
2406 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2407 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2409 if (i == j
2410 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2411 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2412 continue;
2414 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2415 fixed_conv_modes[j].mode, "fract",
2416 fixed_conv_modes[i].name,
2417 fixed_conv_modes[j].name);
2418 arm_set_fixed_conv_libfunc (satfract_optab,
2419 fixed_conv_modes[i].mode,
2420 fixed_conv_modes[j].mode, "satfract",
2421 fixed_conv_modes[i].name,
2422 fixed_conv_modes[j].name);
2423 arm_set_fixed_conv_libfunc (fractuns_optab,
2424 fixed_conv_modes[i].mode,
2425 fixed_conv_modes[j].mode, "fractuns",
2426 fixed_conv_modes[i].name,
2427 fixed_conv_modes[j].name);
2428 arm_set_fixed_conv_libfunc (satfractuns_optab,
2429 fixed_conv_modes[i].mode,
2430 fixed_conv_modes[j].mode, "satfractuns",
2431 fixed_conv_modes[i].name,
2432 fixed_conv_modes[j].name);
2436 if (TARGET_AAPCS_BASED)
2437 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2440 /* On AAPCS systems, this is the "struct __va_list". */
2441 static GTY(()) tree va_list_type;
2443 /* Return the type to use as __builtin_va_list. */
2444 static tree
2445 arm_build_builtin_va_list (void)
2447 tree va_list_name;
2448 tree ap_field;
2450 if (!TARGET_AAPCS_BASED)
2451 return std_build_builtin_va_list ();
2453 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2454 defined as:
2456 struct __va_list
2458 void *__ap;
2461 The C Library ABI further reinforces this definition in \S
2462 4.1.
2464 We must follow this definition exactly. The structure tag
2465 name is visible in C++ mangled names, and thus forms a part
2466 of the ABI. The field name may be used by people who
2467 #include <stdarg.h>. */
2468 /* Create the type. */
2469 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2470 /* Give it the required name. */
2471 va_list_name = build_decl (BUILTINS_LOCATION,
2472 TYPE_DECL,
2473 get_identifier ("__va_list"),
2474 va_list_type);
2475 DECL_ARTIFICIAL (va_list_name) = 1;
2476 TYPE_NAME (va_list_type) = va_list_name;
2477 TYPE_STUB_DECL (va_list_type) = va_list_name;
2478 /* Create the __ap field. */
2479 ap_field = build_decl (BUILTINS_LOCATION,
2480 FIELD_DECL,
2481 get_identifier ("__ap"),
2482 ptr_type_node);
2483 DECL_ARTIFICIAL (ap_field) = 1;
2484 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2485 TYPE_FIELDS (va_list_type) = ap_field;
2486 /* Compute its layout. */
2487 layout_type (va_list_type);
2489 return va_list_type;
2492 /* Return an expression of type "void *" pointing to the next
2493 available argument in a variable-argument list. VALIST is the
2494 user-level va_list object, of type __builtin_va_list. */
2495 static tree
2496 arm_extract_valist_ptr (tree valist)
2498 if (TREE_TYPE (valist) == error_mark_node)
2499 return error_mark_node;
2501 /* On an AAPCS target, the pointer is stored within "struct
2502 va_list". */
2503 if (TARGET_AAPCS_BASED)
2505 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2506 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2507 valist, ap_field, NULL_TREE);
2510 return valist;
2513 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2514 static void
2515 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2517 valist = arm_extract_valist_ptr (valist);
2518 std_expand_builtin_va_start (valist, nextarg);
2521 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2522 static tree
2523 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2524 gimple_seq *post_p)
2526 valist = arm_extract_valist_ptr (valist);
2527 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2530 /* Fix up any incompatible options that the user has specified. */
2531 static void
2532 arm_option_override (void)
2534 if (global_options_set.x_arm_arch_option)
2535 arm_selected_arch = &all_architectures[arm_arch_option];
2537 if (global_options_set.x_arm_cpu_option)
2539 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2540 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2543 if (global_options_set.x_arm_tune_option)
2544 arm_selected_tune = &all_cores[(int) arm_tune_option];
2546 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2547 SUBTARGET_OVERRIDE_OPTIONS;
2548 #endif
2550 if (arm_selected_arch)
2552 if (arm_selected_cpu)
2554 /* Check for conflict between mcpu and march. */
2555 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2557 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2558 arm_selected_cpu->name, arm_selected_arch->name);
2559 /* -march wins for code generation.
2560 -mcpu wins for default tuning. */
2561 if (!arm_selected_tune)
2562 arm_selected_tune = arm_selected_cpu;
2564 arm_selected_cpu = arm_selected_arch;
2566 else
2567 /* -mcpu wins. */
2568 arm_selected_arch = NULL;
2570 else
2571 /* Pick a CPU based on the architecture. */
2572 arm_selected_cpu = arm_selected_arch;
2575 /* If the user did not specify a processor, choose one for them. */
2576 if (!arm_selected_cpu)
2578 const struct processors * sel;
2579 unsigned int sought;
2581 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2582 if (!arm_selected_cpu->name)
2584 #ifdef SUBTARGET_CPU_DEFAULT
2585 /* Use the subtarget default CPU if none was specified by
2586 configure. */
2587 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2588 #endif
2589 /* Default to ARM6. */
2590 if (!arm_selected_cpu->name)
2591 arm_selected_cpu = &all_cores[arm6];
2594 sel = arm_selected_cpu;
2595 insn_flags = sel->flags;
2597 /* Now check to see if the user has specified some command line
2598 switch that require certain abilities from the cpu. */
2599 sought = 0;
2601 if (TARGET_INTERWORK || TARGET_THUMB)
2603 sought |= (FL_THUMB | FL_MODE32);
2605 /* There are no ARM processors that support both APCS-26 and
2606 interworking. Therefore we force FL_MODE26 to be removed
2607 from insn_flags here (if it was set), so that the search
2608 below will always be able to find a compatible processor. */
2609 insn_flags &= ~FL_MODE26;
2612 if (sought != 0 && ((sought & insn_flags) != sought))
2614 /* Try to locate a CPU type that supports all of the abilities
2615 of the default CPU, plus the extra abilities requested by
2616 the user. */
2617 for (sel = all_cores; sel->name != NULL; sel++)
2618 if ((sel->flags & sought) == (sought | insn_flags))
2619 break;
2621 if (sel->name == NULL)
2623 unsigned current_bit_count = 0;
2624 const struct processors * best_fit = NULL;
2626 /* Ideally we would like to issue an error message here
2627 saying that it was not possible to find a CPU compatible
2628 with the default CPU, but which also supports the command
2629 line options specified by the programmer, and so they
2630 ought to use the -mcpu=<name> command line option to
2631 override the default CPU type.
2633 If we cannot find a cpu that has both the
2634 characteristics of the default cpu and the given
2635 command line options we scan the array again looking
2636 for a best match. */
2637 for (sel = all_cores; sel->name != NULL; sel++)
2638 if ((sel->flags & sought) == sought)
2640 unsigned count;
2642 count = bit_count (sel->flags & insn_flags);
2644 if (count >= current_bit_count)
2646 best_fit = sel;
2647 current_bit_count = count;
2651 gcc_assert (best_fit);
2652 sel = best_fit;
2655 arm_selected_cpu = sel;
2659 gcc_assert (arm_selected_cpu);
2660 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2661 if (!arm_selected_tune)
2662 arm_selected_tune = &all_cores[arm_selected_cpu->core];
2664 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2665 insn_flags = arm_selected_cpu->flags;
2666 arm_base_arch = arm_selected_cpu->base_arch;
2668 arm_tune = arm_selected_tune->core;
2669 tune_flags = arm_selected_tune->flags;
2670 current_tune = arm_selected_tune->tune;
2672 /* Make sure that the processor choice does not conflict with any of the
2673 other command line choices. */
2674 if (TARGET_ARM && !(insn_flags & FL_NOTM))
2675 error ("target CPU does not support ARM mode");
2677 /* BPABI targets use linker tricks to allow interworking on cores
2678 without thumb support. */
2679 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2681 warning (0, "target CPU does not support interworking" );
2682 target_flags &= ~MASK_INTERWORK;
2685 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2687 warning (0, "target CPU does not support THUMB instructions");
2688 target_flags &= ~MASK_THUMB;
2691 if (TARGET_APCS_FRAME && TARGET_THUMB)
2693 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2694 target_flags &= ~MASK_APCS_FRAME;
2697 /* Callee super interworking implies thumb interworking. Adding
2698 this to the flags here simplifies the logic elsewhere. */
2699 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2700 target_flags |= MASK_INTERWORK;
2702 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2703 from here where no function is being compiled currently. */
2704 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2705 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2707 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2708 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2710 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2712 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2713 target_flags |= MASK_APCS_FRAME;
2716 if (TARGET_POKE_FUNCTION_NAME)
2717 target_flags |= MASK_APCS_FRAME;
2719 if (TARGET_APCS_REENT && flag_pic)
2720 error ("-fpic and -mapcs-reent are incompatible");
2722 if (TARGET_APCS_REENT)
2723 warning (0, "APCS reentrant code not supported. Ignored");
2725 /* If this target is normally configured to use APCS frames, warn if they
2726 are turned off and debugging is turned on. */
2727 if (TARGET_ARM
2728 && write_symbols != NO_DEBUG
2729 && !TARGET_APCS_FRAME
2730 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2731 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2733 if (TARGET_APCS_FLOAT)
2734 warning (0, "passing floating point arguments in fp regs not yet supported");
2736 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2737 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2738 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2739 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2740 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2741 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2742 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2743 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2744 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2745 arm_arch6m = arm_arch6 && !arm_arch_notm;
2746 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2747 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2748 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2749 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2750 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2752 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2753 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2754 thumb_code = TARGET_ARM == 0;
2755 thumb1_code = TARGET_THUMB1 != 0;
2756 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2757 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2758 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2759 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2760 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2761 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2762 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2763 arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2764 arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0;
2765 if (arm_restrict_it == 2)
2766 arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2768 if (!TARGET_THUMB2)
2769 arm_restrict_it = 0;
2771 /* If we are not using the default (ARM mode) section anchor offset
2772 ranges, then set the correct ranges now. */
2773 if (TARGET_THUMB1)
2775 /* Thumb-1 LDR instructions cannot have negative offsets.
2776 Permissible positive offset ranges are 5-bit (for byte loads),
2777 6-bit (for halfword loads), or 7-bit (for word loads).
2778 Empirical results suggest a 7-bit anchor range gives the best
2779 overall code size. */
2780 targetm.min_anchor_offset = 0;
2781 targetm.max_anchor_offset = 127;
2783 else if (TARGET_THUMB2)
2785 /* The minimum is set such that the total size of the block
2786 for a particular anchor is 248 + 1 + 4095 bytes, which is
2787 divisible by eight, ensuring natural spacing of anchors. */
2788 targetm.min_anchor_offset = -248;
2789 targetm.max_anchor_offset = 4095;
2792 /* V5 code we generate is completely interworking capable, so we turn off
2793 TARGET_INTERWORK here to avoid many tests later on. */
2795 /* XXX However, we must pass the right pre-processor defines to CPP
2796 or GLD can get confused. This is a hack. */
2797 if (TARGET_INTERWORK)
2798 arm_cpp_interwork = 1;
2800 if (arm_arch5)
2801 target_flags &= ~MASK_INTERWORK;
2803 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2804 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2806 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2807 error ("iwmmxt abi requires an iwmmxt capable cpu");
2809 if (!global_options_set.x_arm_fpu_index)
2811 const char *target_fpu_name;
2812 bool ok;
2814 #ifdef FPUTYPE_DEFAULT
2815 target_fpu_name = FPUTYPE_DEFAULT;
2816 #else
2817 target_fpu_name = "vfp";
2818 #endif
2820 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2821 CL_TARGET);
2822 gcc_assert (ok);
2825 arm_fpu_desc = &all_fpus[arm_fpu_index];
2827 if (TARGET_NEON && !arm_arch7)
2828 error ("target CPU does not support NEON");
2830 switch (arm_fpu_desc->model)
2832 case ARM_FP_MODEL_VFP:
2833 arm_fpu_attr = FPU_VFP;
2834 break;
2836 default:
2837 gcc_unreachable();
2840 if (TARGET_AAPCS_BASED)
2842 if (TARGET_CALLER_INTERWORKING)
2843 error ("AAPCS does not support -mcaller-super-interworking");
2844 else
2845 if (TARGET_CALLEE_INTERWORKING)
2846 error ("AAPCS does not support -mcallee-super-interworking");
2849 /* iWMMXt and NEON are incompatible. */
2850 if (TARGET_IWMMXT && TARGET_NEON)
2851 error ("iWMMXt and NEON are incompatible");
2853 /* iWMMXt unsupported under Thumb mode. */
2854 if (TARGET_THUMB && TARGET_IWMMXT)
2855 error ("iWMMXt unsupported under Thumb mode");
2857 /* __fp16 support currently assumes the core has ldrh. */
2858 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2859 sorry ("__fp16 and no ldrh");
2861 /* If soft-float is specified then don't use FPU. */
2862 if (TARGET_SOFT_FLOAT)
2863 arm_fpu_attr = FPU_NONE;
2865 if (TARGET_AAPCS_BASED)
2867 if (arm_abi == ARM_ABI_IWMMXT)
2868 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2869 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2870 && TARGET_HARD_FLOAT
2871 && TARGET_VFP)
2872 arm_pcs_default = ARM_PCS_AAPCS_VFP;
2873 else
2874 arm_pcs_default = ARM_PCS_AAPCS;
2876 else
2878 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2879 sorry ("-mfloat-abi=hard and VFP");
2881 if (arm_abi == ARM_ABI_APCS)
2882 arm_pcs_default = ARM_PCS_APCS;
2883 else
2884 arm_pcs_default = ARM_PCS_ATPCS;
2887 /* For arm2/3 there is no need to do any scheduling if we are doing
2888 software floating-point. */
2889 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2890 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2892 /* Use the cp15 method if it is available. */
2893 if (target_thread_pointer == TP_AUTO)
2895 if (arm_arch6k && !TARGET_THUMB1)
2896 target_thread_pointer = TP_CP15;
2897 else
2898 target_thread_pointer = TP_SOFT;
2901 if (TARGET_HARD_TP && TARGET_THUMB1)
2902 error ("can not use -mtp=cp15 with 16-bit Thumb");
2904 /* Override the default structure alignment for AAPCS ABI. */
2905 if (!global_options_set.x_arm_structure_size_boundary)
2907 if (TARGET_AAPCS_BASED)
2908 arm_structure_size_boundary = 8;
2910 else
2912 if (arm_structure_size_boundary != 8
2913 && arm_structure_size_boundary != 32
2914 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2916 if (ARM_DOUBLEWORD_ALIGN)
2917 warning (0,
2918 "structure size boundary can only be set to 8, 32 or 64");
2919 else
2920 warning (0, "structure size boundary can only be set to 8 or 32");
2921 arm_structure_size_boundary
2922 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2926 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2928 error ("RTP PIC is incompatible with Thumb");
2929 flag_pic = 0;
2932 /* If stack checking is disabled, we can use r10 as the PIC register,
2933 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2934 if (flag_pic && TARGET_SINGLE_PIC_BASE)
2936 if (TARGET_VXWORKS_RTP)
2937 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2938 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2941 if (flag_pic && TARGET_VXWORKS_RTP)
2942 arm_pic_register = 9;
2944 if (arm_pic_register_string != NULL)
2946 int pic_register = decode_reg_name (arm_pic_register_string);
2948 if (!flag_pic)
2949 warning (0, "-mpic-register= is useless without -fpic");
2951 /* Prevent the user from choosing an obviously stupid PIC register. */
2952 else if (pic_register < 0 || call_used_regs[pic_register]
2953 || pic_register == HARD_FRAME_POINTER_REGNUM
2954 || pic_register == STACK_POINTER_REGNUM
2955 || pic_register >= PC_REGNUM
2956 || (TARGET_VXWORKS_RTP
2957 && (unsigned int) pic_register != arm_pic_register))
2958 error ("unable to use '%s' for PIC register", arm_pic_register_string);
2959 else
2960 arm_pic_register = pic_register;
2963 if (TARGET_VXWORKS_RTP
2964 && !global_options_set.x_arm_pic_data_is_text_relative)
2965 arm_pic_data_is_text_relative = 0;
2967 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2968 if (fix_cm3_ldrd == 2)
2970 if (arm_selected_cpu->core == cortexm3)
2971 fix_cm3_ldrd = 1;
2972 else
2973 fix_cm3_ldrd = 0;
2976 /* Enable -munaligned-access by default for
2977 - all ARMv6 architecture-based processors
2978 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2979 - ARMv8 architecture-base processors.
2981 Disable -munaligned-access by default for
2982 - all pre-ARMv6 architecture-based processors
2983 - ARMv6-M architecture-based processors. */
2985 if (unaligned_access == 2)
2987 if (arm_arch6 && (arm_arch_notm || arm_arch7))
2988 unaligned_access = 1;
2989 else
2990 unaligned_access = 0;
2992 else if (unaligned_access == 1
2993 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2995 warning (0, "target CPU does not support unaligned accesses");
2996 unaligned_access = 0;
2999 if (TARGET_THUMB1 && flag_schedule_insns)
3001 /* Don't warn since it's on by default in -O2. */
3002 flag_schedule_insns = 0;
3005 if (optimize_size)
3007 /* If optimizing for size, bump the number of instructions that we
3008 are prepared to conditionally execute (even on a StrongARM). */
3009 max_insns_skipped = 6;
3011 /* For THUMB2, we limit the conditional sequence to one IT block. */
3012 if (TARGET_THUMB2)
3013 max_insns_skipped = MAX_INSN_PER_IT_BLOCK;
3015 else
3016 max_insns_skipped = current_tune->max_insns_skipped;
3018 /* Hot/Cold partitioning is not currently supported, since we can't
3019 handle literal pool placement in that case. */
3020 if (flag_reorder_blocks_and_partition)
3022 inform (input_location,
3023 "-freorder-blocks-and-partition not supported on this architecture");
3024 flag_reorder_blocks_and_partition = 0;
3025 flag_reorder_blocks = 1;
3028 if (flag_pic)
3029 /* Hoisting PIC address calculations more aggressively provides a small,
3030 but measurable, size reduction for PIC code. Therefore, we decrease
3031 the bar for unrestricted expression hoisting to the cost of PIC address
3032 calculation, which is 2 instructions. */
3033 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3034 global_options.x_param_values,
3035 global_options_set.x_param_values);
3037 /* ARM EABI defaults to strict volatile bitfields. */
3038 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3039 && abi_version_at_least(2))
3040 flag_strict_volatile_bitfields = 1;
3042 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
3043 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
3044 if (flag_prefetch_loop_arrays < 0
3045 && HAVE_prefetch
3046 && optimize >= 3
3047 && current_tune->num_prefetch_slots > 0)
3048 flag_prefetch_loop_arrays = 1;
3050 /* Set up parameters to be used in prefetching algorithm. Do not override the
3051 defaults unless we are tuning for a core we have researched values for. */
3052 if (current_tune->num_prefetch_slots > 0)
3053 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3054 current_tune->num_prefetch_slots,
3055 global_options.x_param_values,
3056 global_options_set.x_param_values);
3057 if (current_tune->l1_cache_line_size >= 0)
3058 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3059 current_tune->l1_cache_line_size,
3060 global_options.x_param_values,
3061 global_options_set.x_param_values);
3062 if (current_tune->l1_cache_size >= 0)
3063 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3064 current_tune->l1_cache_size,
3065 global_options.x_param_values,
3066 global_options_set.x_param_values);
3068 /* Use Neon to perform 64-bits operations rather than core
3069 registers. */
3070 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3071 if (use_neon_for_64bits == 1)
3072 prefer_neon_for_64bits = true;
3074 /* Use the alternative scheduling-pressure algorithm by default. */
3075 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3076 global_options.x_param_values,
3077 global_options_set.x_param_values);
3079 /* Disable shrink-wrap when optimizing function for size, since it tends to
3080 generate additional returns. */
3081 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
3082 flag_shrink_wrap = false;
3083 /* TBD: Dwarf info for apcs frame is not handled yet. */
3084 if (TARGET_APCS_FRAME)
3085 flag_shrink_wrap = false;
3087 /* We only support -mslow-flash-data on armv7-m targets. */
3088 if (target_slow_flash_data
3089 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
3090 || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
3091 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
3093 /* Currently, for slow flash data, we just disable literal pools. */
3094 if (target_slow_flash_data)
3095 arm_disable_literal_pool = true;
3097 /* Thumb2 inline assembly code should always use unified syntax.
3098 This will apply to ARM and Thumb1 eventually. */
3099 if (TARGET_THUMB2)
3100 inline_asm_unified = 1;
3102 /* Disable scheduling fusion by default if it's not armv7 processor
3103 or doesn't prefer ldrd/strd. */
3104 if (flag_schedule_fusion == 2
3105 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3106 flag_schedule_fusion = 0;
3108 /* Register global variables with the garbage collector. */
3109 arm_add_gc_roots ();
3112 static void
3113 arm_add_gc_roots (void)
3115 gcc_obstack_init(&minipool_obstack);
3116 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3119 /* A table of known ARM exception types.
3120 For use with the interrupt function attribute. */
3122 typedef struct
3124 const char *const arg;
3125 const unsigned long return_value;
3127 isr_attribute_arg;
3129 static const isr_attribute_arg isr_attribute_args [] =
3131 { "IRQ", ARM_FT_ISR },
3132 { "irq", ARM_FT_ISR },
3133 { "FIQ", ARM_FT_FIQ },
3134 { "fiq", ARM_FT_FIQ },
3135 { "ABORT", ARM_FT_ISR },
3136 { "abort", ARM_FT_ISR },
3137 { "ABORT", ARM_FT_ISR },
3138 { "abort", ARM_FT_ISR },
3139 { "UNDEF", ARM_FT_EXCEPTION },
3140 { "undef", ARM_FT_EXCEPTION },
3141 { "SWI", ARM_FT_EXCEPTION },
3142 { "swi", ARM_FT_EXCEPTION },
3143 { NULL, ARM_FT_NORMAL }
3146 /* Returns the (interrupt) function type of the current
3147 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3149 static unsigned long
3150 arm_isr_value (tree argument)
3152 const isr_attribute_arg * ptr;
3153 const char * arg;
3155 if (!arm_arch_notm)
3156 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3158 /* No argument - default to IRQ. */
3159 if (argument == NULL_TREE)
3160 return ARM_FT_ISR;
3162 /* Get the value of the argument. */
3163 if (TREE_VALUE (argument) == NULL_TREE
3164 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3165 return ARM_FT_UNKNOWN;
3167 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3169 /* Check it against the list of known arguments. */
3170 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3171 if (streq (arg, ptr->arg))
3172 return ptr->return_value;
3174 /* An unrecognized interrupt type. */
3175 return ARM_FT_UNKNOWN;
3178 /* Computes the type of the current function. */
3180 static unsigned long
3181 arm_compute_func_type (void)
3183 unsigned long type = ARM_FT_UNKNOWN;
3184 tree a;
3185 tree attr;
3187 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3189 /* Decide if the current function is volatile. Such functions
3190 never return, and many memory cycles can be saved by not storing
3191 register values that will never be needed again. This optimization
3192 was added to speed up context switching in a kernel application. */
3193 if (optimize > 0
3194 && (TREE_NOTHROW (current_function_decl)
3195 || !(flag_unwind_tables
3196 || (flag_exceptions
3197 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3198 && TREE_THIS_VOLATILE (current_function_decl))
3199 type |= ARM_FT_VOLATILE;
3201 if (cfun->static_chain_decl != NULL)
3202 type |= ARM_FT_NESTED;
3204 attr = DECL_ATTRIBUTES (current_function_decl);
3206 a = lookup_attribute ("naked", attr);
3207 if (a != NULL_TREE)
3208 type |= ARM_FT_NAKED;
3210 a = lookup_attribute ("isr", attr);
3211 if (a == NULL_TREE)
3212 a = lookup_attribute ("interrupt", attr);
3214 if (a == NULL_TREE)
3215 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3216 else
3217 type |= arm_isr_value (TREE_VALUE (a));
3219 return type;
3222 /* Returns the type of the current function. */
3224 unsigned long
3225 arm_current_func_type (void)
3227 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3228 cfun->machine->func_type = arm_compute_func_type ();
3230 return cfun->machine->func_type;
3233 bool
3234 arm_allocate_stack_slots_for_args (void)
3236 /* Naked functions should not allocate stack slots for arguments. */
3237 return !IS_NAKED (arm_current_func_type ());
3240 static bool
3241 arm_warn_func_return (tree decl)
3243 /* Naked functions are implemented entirely in assembly, including the
3244 return sequence, so suppress warnings about this. */
3245 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3249 /* Output assembler code for a block containing the constant parts
3250 of a trampoline, leaving space for the variable parts.
3252 On the ARM, (if r8 is the static chain regnum, and remembering that
3253 referencing pc adds an offset of 8) the trampoline looks like:
3254 ldr r8, [pc, #0]
3255 ldr pc, [pc]
3256 .word static chain value
3257 .word function's address
3258 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3260 static void
3261 arm_asm_trampoline_template (FILE *f)
3263 if (TARGET_ARM)
3265 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3266 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3268 else if (TARGET_THUMB2)
3270 /* The Thumb-2 trampoline is similar to the arm implementation.
3271 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3272 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3273 STATIC_CHAIN_REGNUM, PC_REGNUM);
3274 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3276 else
3278 ASM_OUTPUT_ALIGN (f, 2);
3279 fprintf (f, "\t.code\t16\n");
3280 fprintf (f, ".Ltrampoline_start:\n");
3281 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3282 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3283 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3284 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3285 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3286 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3288 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3289 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3292 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3294 static void
3295 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3297 rtx fnaddr, mem, a_tramp;
3299 emit_block_move (m_tramp, assemble_trampoline_template (),
3300 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3302 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3303 emit_move_insn (mem, chain_value);
3305 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3306 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3307 emit_move_insn (mem, fnaddr);
3309 a_tramp = XEXP (m_tramp, 0);
3310 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3311 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3312 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3315 /* Thumb trampolines should be entered in thumb mode, so set
3316 the bottom bit of the address. */
3318 static rtx
3319 arm_trampoline_adjust_address (rtx addr)
3321 if (TARGET_THUMB)
3322 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3323 NULL, 0, OPTAB_LIB_WIDEN);
3324 return addr;
3327 /* Return 1 if it is possible to return using a single instruction.
3328 If SIBLING is non-null, this is a test for a return before a sibling
3329 call. SIBLING is the call insn, so we can examine its register usage. */
3332 use_return_insn (int iscond, rtx sibling)
3334 int regno;
3335 unsigned int func_type;
3336 unsigned long saved_int_regs;
3337 unsigned HOST_WIDE_INT stack_adjust;
3338 arm_stack_offsets *offsets;
3340 /* Never use a return instruction before reload has run. */
3341 if (!reload_completed)
3342 return 0;
3344 func_type = arm_current_func_type ();
3346 /* Naked, volatile and stack alignment functions need special
3347 consideration. */
3348 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3349 return 0;
3351 /* So do interrupt functions that use the frame pointer and Thumb
3352 interrupt functions. */
3353 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3354 return 0;
3356 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3357 && !optimize_function_for_size_p (cfun))
3358 return 0;
3360 offsets = arm_get_frame_offsets ();
3361 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3363 /* As do variadic functions. */
3364 if (crtl->args.pretend_args_size
3365 || cfun->machine->uses_anonymous_args
3366 /* Or if the function calls __builtin_eh_return () */
3367 || crtl->calls_eh_return
3368 /* Or if the function calls alloca */
3369 || cfun->calls_alloca
3370 /* Or if there is a stack adjustment. However, if the stack pointer
3371 is saved on the stack, we can use a pre-incrementing stack load. */
3372 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3373 && stack_adjust == 4)))
3374 return 0;
3376 saved_int_regs = offsets->saved_regs_mask;
3378 /* Unfortunately, the insn
3380 ldmib sp, {..., sp, ...}
3382 triggers a bug on most SA-110 based devices, such that the stack
3383 pointer won't be correctly restored if the instruction takes a
3384 page fault. We work around this problem by popping r3 along with
3385 the other registers, since that is never slower than executing
3386 another instruction.
3388 We test for !arm_arch5 here, because code for any architecture
3389 less than this could potentially be run on one of the buggy
3390 chips. */
3391 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3393 /* Validate that r3 is a call-clobbered register (always true in
3394 the default abi) ... */
3395 if (!call_used_regs[3])
3396 return 0;
3398 /* ... that it isn't being used for a return value ... */
3399 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3400 return 0;
3402 /* ... or for a tail-call argument ... */
3403 if (sibling)
3405 gcc_assert (CALL_P (sibling));
3407 if (find_regno_fusage (sibling, USE, 3))
3408 return 0;
3411 /* ... and that there are no call-saved registers in r0-r2
3412 (always true in the default ABI). */
3413 if (saved_int_regs & 0x7)
3414 return 0;
3417 /* Can't be done if interworking with Thumb, and any registers have been
3418 stacked. */
3419 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3420 return 0;
3422 /* On StrongARM, conditional returns are expensive if they aren't
3423 taken and multiple registers have been stacked. */
3424 if (iscond && arm_tune_strongarm)
3426 /* Conditional return when just the LR is stored is a simple
3427 conditional-load instruction, that's not expensive. */
3428 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3429 return 0;
3431 if (flag_pic
3432 && arm_pic_register != INVALID_REGNUM
3433 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3434 return 0;
3437 /* If there are saved registers but the LR isn't saved, then we need
3438 two instructions for the return. */
3439 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3440 return 0;
3442 /* Can't be done if any of the VFP regs are pushed,
3443 since this also requires an insn. */
3444 if (TARGET_HARD_FLOAT && TARGET_VFP)
3445 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3446 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3447 return 0;
3449 if (TARGET_REALLY_IWMMXT)
3450 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3451 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3452 return 0;
3454 return 1;
3457 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3458 shrink-wrapping if possible. This is the case if we need to emit a
3459 prologue, which we can test by looking at the offsets. */
3460 bool
3461 use_simple_return_p (void)
3463 arm_stack_offsets *offsets;
3465 offsets = arm_get_frame_offsets ();
3466 return offsets->outgoing_args != 0;
3469 /* Return TRUE if int I is a valid immediate ARM constant. */
3472 const_ok_for_arm (HOST_WIDE_INT i)
3474 int lowbit;
3476 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3477 be all zero, or all one. */
3478 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3479 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3480 != ((~(unsigned HOST_WIDE_INT) 0)
3481 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3482 return FALSE;
3484 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3486 /* Fast return for 0 and small values. We must do this for zero, since
3487 the code below can't handle that one case. */
3488 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3489 return TRUE;
3491 /* Get the number of trailing zeros. */
3492 lowbit = ffs((int) i) - 1;
3494 /* Only even shifts are allowed in ARM mode so round down to the
3495 nearest even number. */
3496 if (TARGET_ARM)
3497 lowbit &= ~1;
3499 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3500 return TRUE;
3502 if (TARGET_ARM)
3504 /* Allow rotated constants in ARM mode. */
3505 if (lowbit <= 4
3506 && ((i & ~0xc000003f) == 0
3507 || (i & ~0xf000000f) == 0
3508 || (i & ~0xfc000003) == 0))
3509 return TRUE;
3511 else
3513 HOST_WIDE_INT v;
3515 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3516 v = i & 0xff;
3517 v |= v << 16;
3518 if (i == v || i == (v | (v << 8)))
3519 return TRUE;
3521 /* Allow repeated pattern 0xXY00XY00. */
3522 v = i & 0xff00;
3523 v |= v << 16;
3524 if (i == v)
3525 return TRUE;
3528 return FALSE;
3531 /* Return true if I is a valid constant for the operation CODE. */
3533 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3535 if (const_ok_for_arm (i))
3536 return 1;
3538 switch (code)
3540 case SET:
3541 /* See if we can use movw. */
3542 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3543 return 1;
3544 else
3545 /* Otherwise, try mvn. */
3546 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3548 case PLUS:
3549 /* See if we can use addw or subw. */
3550 if (TARGET_THUMB2
3551 && ((i & 0xfffff000) == 0
3552 || ((-i) & 0xfffff000) == 0))
3553 return 1;
3554 /* else fall through. */
3556 case COMPARE:
3557 case EQ:
3558 case NE:
3559 case GT:
3560 case LE:
3561 case LT:
3562 case GE:
3563 case GEU:
3564 case LTU:
3565 case GTU:
3566 case LEU:
3567 case UNORDERED:
3568 case ORDERED:
3569 case UNEQ:
3570 case UNGE:
3571 case UNLT:
3572 case UNGT:
3573 case UNLE:
3574 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3576 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3577 case XOR:
3578 return 0;
3580 case IOR:
3581 if (TARGET_THUMB2)
3582 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3583 return 0;
3585 case AND:
3586 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3588 default:
3589 gcc_unreachable ();
3593 /* Return true if I is a valid di mode constant for the operation CODE. */
3595 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3597 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3598 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3599 rtx hi = GEN_INT (hi_val);
3600 rtx lo = GEN_INT (lo_val);
3602 if (TARGET_THUMB1)
3603 return 0;
3605 switch (code)
3607 case AND:
3608 case IOR:
3609 case XOR:
3610 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3611 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3612 case PLUS:
3613 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3615 default:
3616 return 0;
3620 /* Emit a sequence of insns to handle a large constant.
3621 CODE is the code of the operation required, it can be any of SET, PLUS,
3622 IOR, AND, XOR, MINUS;
3623 MODE is the mode in which the operation is being performed;
3624 VAL is the integer to operate on;
3625 SOURCE is the other operand (a register, or a null-pointer for SET);
3626 SUBTARGETS means it is safe to create scratch registers if that will
3627 either produce a simpler sequence, or we will want to cse the values.
3628 Return value is the number of insns emitted. */
3630 /* ??? Tweak this for thumb2. */
3632 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
3633 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3635 rtx cond;
3637 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3638 cond = COND_EXEC_TEST (PATTERN (insn));
3639 else
3640 cond = NULL_RTX;
3642 if (subtargets || code == SET
3643 || (REG_P (target) && REG_P (source)
3644 && REGNO (target) != REGNO (source)))
3646 /* After arm_reorg has been called, we can't fix up expensive
3647 constants by pushing them into memory so we must synthesize
3648 them in-line, regardless of the cost. This is only likely to
3649 be more costly on chips that have load delay slots and we are
3650 compiling without running the scheduler (so no splitting
3651 occurred before the final instruction emission).
3653 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3655 if (!cfun->machine->after_arm_reorg
3656 && !cond
3657 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3658 1, 0)
3659 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3660 + (code != SET))))
3662 if (code == SET)
3664 /* Currently SET is the only monadic value for CODE, all
3665 the rest are diadic. */
3666 if (TARGET_USE_MOVT)
3667 arm_emit_movpair (target, GEN_INT (val));
3668 else
3669 emit_set_insn (target, GEN_INT (val));
3671 return 1;
3673 else
3675 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3677 if (TARGET_USE_MOVT)
3678 arm_emit_movpair (temp, GEN_INT (val));
3679 else
3680 emit_set_insn (temp, GEN_INT (val));
3682 /* For MINUS, the value is subtracted from, since we never
3683 have subtraction of a constant. */
3684 if (code == MINUS)
3685 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3686 else
3687 emit_set_insn (target,
3688 gen_rtx_fmt_ee (code, mode, source, temp));
3689 return 2;
3694 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3698 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3699 ARM/THUMB2 immediates, and add up to VAL.
3700 Thr function return value gives the number of insns required. */
3701 static int
3702 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3703 struct four_ints *return_sequence)
3705 int best_consecutive_zeros = 0;
3706 int i;
3707 int best_start = 0;
3708 int insns1, insns2;
3709 struct four_ints tmp_sequence;
3711 /* If we aren't targeting ARM, the best place to start is always at
3712 the bottom, otherwise look more closely. */
3713 if (TARGET_ARM)
3715 for (i = 0; i < 32; i += 2)
3717 int consecutive_zeros = 0;
3719 if (!(val & (3 << i)))
3721 while ((i < 32) && !(val & (3 << i)))
3723 consecutive_zeros += 2;
3724 i += 2;
3726 if (consecutive_zeros > best_consecutive_zeros)
3728 best_consecutive_zeros = consecutive_zeros;
3729 best_start = i - consecutive_zeros;
3731 i -= 2;
3736 /* So long as it won't require any more insns to do so, it's
3737 desirable to emit a small constant (in bits 0...9) in the last
3738 insn. This way there is more chance that it can be combined with
3739 a later addressing insn to form a pre-indexed load or store
3740 operation. Consider:
3742 *((volatile int *)0xe0000100) = 1;
3743 *((volatile int *)0xe0000110) = 2;
3745 We want this to wind up as:
3747 mov rA, #0xe0000000
3748 mov rB, #1
3749 str rB, [rA, #0x100]
3750 mov rB, #2
3751 str rB, [rA, #0x110]
3753 rather than having to synthesize both large constants from scratch.
3755 Therefore, we calculate how many insns would be required to emit
3756 the constant starting from `best_start', and also starting from
3757 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3758 yield a shorter sequence, we may as well use zero. */
3759 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3760 if (best_start != 0
3761 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3763 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3764 if (insns2 <= insns1)
3766 *return_sequence = tmp_sequence;
3767 insns1 = insns2;
3771 return insns1;
3774 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3775 static int
3776 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3777 struct four_ints *return_sequence, int i)
3779 int remainder = val & 0xffffffff;
3780 int insns = 0;
3782 /* Try and find a way of doing the job in either two or three
3783 instructions.
3785 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3786 location. We start at position I. This may be the MSB, or
3787 optimial_immediate_sequence may have positioned it at the largest block
3788 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3789 wrapping around to the top of the word when we drop off the bottom.
3790 In the worst case this code should produce no more than four insns.
3792 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3793 constants, shifted to any arbitrary location. We should always start
3794 at the MSB. */
3797 int end;
3798 unsigned int b1, b2, b3, b4;
3799 unsigned HOST_WIDE_INT result;
3800 int loc;
3802 gcc_assert (insns < 4);
3804 if (i <= 0)
3805 i += 32;
3807 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3808 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3810 loc = i;
3811 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3812 /* We can use addw/subw for the last 12 bits. */
3813 result = remainder;
3814 else
3816 /* Use an 8-bit shifted/rotated immediate. */
3817 end = i - 8;
3818 if (end < 0)
3819 end += 32;
3820 result = remainder & ((0x0ff << end)
3821 | ((i < end) ? (0xff >> (32 - end))
3822 : 0));
3823 i -= 8;
3826 else
3828 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3829 arbitrary shifts. */
3830 i -= TARGET_ARM ? 2 : 1;
3831 continue;
3834 /* Next, see if we can do a better job with a thumb2 replicated
3835 constant.
3837 We do it this way around to catch the cases like 0x01F001E0 where
3838 two 8-bit immediates would work, but a replicated constant would
3839 make it worse.
3841 TODO: 16-bit constants that don't clear all the bits, but still win.
3842 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3843 if (TARGET_THUMB2)
3845 b1 = (remainder & 0xff000000) >> 24;
3846 b2 = (remainder & 0x00ff0000) >> 16;
3847 b3 = (remainder & 0x0000ff00) >> 8;
3848 b4 = remainder & 0xff;
3850 if (loc > 24)
3852 /* The 8-bit immediate already found clears b1 (and maybe b2),
3853 but must leave b3 and b4 alone. */
3855 /* First try to find a 32-bit replicated constant that clears
3856 almost everything. We can assume that we can't do it in one,
3857 or else we wouldn't be here. */
3858 unsigned int tmp = b1 & b2 & b3 & b4;
3859 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3860 + (tmp << 24);
3861 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3862 + (tmp == b3) + (tmp == b4);
3863 if (tmp
3864 && (matching_bytes >= 3
3865 || (matching_bytes == 2
3866 && const_ok_for_op (remainder & ~tmp2, code))))
3868 /* At least 3 of the bytes match, and the fourth has at
3869 least as many bits set, or two of the bytes match
3870 and it will only require one more insn to finish. */
3871 result = tmp2;
3872 i = tmp != b1 ? 32
3873 : tmp != b2 ? 24
3874 : tmp != b3 ? 16
3875 : 8;
3878 /* Second, try to find a 16-bit replicated constant that can
3879 leave three of the bytes clear. If b2 or b4 is already
3880 zero, then we can. If the 8-bit from above would not
3881 clear b2 anyway, then we still win. */
3882 else if (b1 == b3 && (!b2 || !b4
3883 || (remainder & 0x00ff0000 & ~result)))
3885 result = remainder & 0xff00ff00;
3886 i = 24;
3889 else if (loc > 16)
3891 /* The 8-bit immediate already found clears b2 (and maybe b3)
3892 and we don't get here unless b1 is alredy clear, but it will
3893 leave b4 unchanged. */
3895 /* If we can clear b2 and b4 at once, then we win, since the
3896 8-bits couldn't possibly reach that far. */
3897 if (b2 == b4)
3899 result = remainder & 0x00ff00ff;
3900 i = 16;
3905 return_sequence->i[insns++] = result;
3906 remainder &= ~result;
3908 if (code == SET || code == MINUS)
3909 code = PLUS;
3911 while (remainder);
3913 return insns;
3916 /* Emit an instruction with the indicated PATTERN. If COND is
3917 non-NULL, conditionalize the execution of the instruction on COND
3918 being true. */
3920 static void
3921 emit_constant_insn (rtx cond, rtx pattern)
3923 if (cond)
3924 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3925 emit_insn (pattern);
3928 /* As above, but extra parameter GENERATE which, if clear, suppresses
3929 RTL generation. */
3931 static int
3932 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
3933 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3934 int generate)
3936 int can_invert = 0;
3937 int can_negate = 0;
3938 int final_invert = 0;
3939 int i;
3940 int set_sign_bit_copies = 0;
3941 int clear_sign_bit_copies = 0;
3942 int clear_zero_bit_copies = 0;
3943 int set_zero_bit_copies = 0;
3944 int insns = 0, neg_insns, inv_insns;
3945 unsigned HOST_WIDE_INT temp1, temp2;
3946 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
3947 struct four_ints *immediates;
3948 struct four_ints pos_immediates, neg_immediates, inv_immediates;
3950 /* Find out which operations are safe for a given CODE. Also do a quick
3951 check for degenerate cases; these can occur when DImode operations
3952 are split. */
3953 switch (code)
3955 case SET:
3956 can_invert = 1;
3957 break;
3959 case PLUS:
3960 can_negate = 1;
3961 break;
3963 case IOR:
3964 if (remainder == 0xffffffff)
3966 if (generate)
3967 emit_constant_insn (cond,
3968 gen_rtx_SET (VOIDmode, target,
3969 GEN_INT (ARM_SIGN_EXTEND (val))));
3970 return 1;
3973 if (remainder == 0)
3975 if (reload_completed && rtx_equal_p (target, source))
3976 return 0;
3978 if (generate)
3979 emit_constant_insn (cond,
3980 gen_rtx_SET (VOIDmode, target, source));
3981 return 1;
3983 break;
3985 case AND:
3986 if (remainder == 0)
3988 if (generate)
3989 emit_constant_insn (cond,
3990 gen_rtx_SET (VOIDmode, target, const0_rtx));
3991 return 1;
3993 if (remainder == 0xffffffff)
3995 if (reload_completed && rtx_equal_p (target, source))
3996 return 0;
3997 if (generate)
3998 emit_constant_insn (cond,
3999 gen_rtx_SET (VOIDmode, target, source));
4000 return 1;
4002 can_invert = 1;
4003 break;
4005 case XOR:
4006 if (remainder == 0)
4008 if (reload_completed && rtx_equal_p (target, source))
4009 return 0;
4010 if (generate)
4011 emit_constant_insn (cond,
4012 gen_rtx_SET (VOIDmode, target, source));
4013 return 1;
4016 if (remainder == 0xffffffff)
4018 if (generate)
4019 emit_constant_insn (cond,
4020 gen_rtx_SET (VOIDmode, target,
4021 gen_rtx_NOT (mode, source)));
4022 return 1;
4024 final_invert = 1;
4025 break;
4027 case MINUS:
4028 /* We treat MINUS as (val - source), since (source - val) is always
4029 passed as (source + (-val)). */
4030 if (remainder == 0)
4032 if (generate)
4033 emit_constant_insn (cond,
4034 gen_rtx_SET (VOIDmode, target,
4035 gen_rtx_NEG (mode, source)));
4036 return 1;
4038 if (const_ok_for_arm (val))
4040 if (generate)
4041 emit_constant_insn (cond,
4042 gen_rtx_SET (VOIDmode, target,
4043 gen_rtx_MINUS (mode, GEN_INT (val),
4044 source)));
4045 return 1;
4048 break;
4050 default:
4051 gcc_unreachable ();
4054 /* If we can do it in one insn get out quickly. */
4055 if (const_ok_for_op (val, code))
4057 if (generate)
4058 emit_constant_insn (cond,
4059 gen_rtx_SET (VOIDmode, target,
4060 (source
4061 ? gen_rtx_fmt_ee (code, mode, source,
4062 GEN_INT (val))
4063 : GEN_INT (val))));
4064 return 1;
4067 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4068 insn. */
4069 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4070 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4072 if (generate)
4074 if (mode == SImode && i == 16)
4075 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4076 smaller insn. */
4077 emit_constant_insn (cond,
4078 gen_zero_extendhisi2
4079 (target, gen_lowpart (HImode, source)));
4080 else
4081 /* Extz only supports SImode, but we can coerce the operands
4082 into that mode. */
4083 emit_constant_insn (cond,
4084 gen_extzv_t2 (gen_lowpart (SImode, target),
4085 gen_lowpart (SImode, source),
4086 GEN_INT (i), const0_rtx));
4089 return 1;
4092 /* Calculate a few attributes that may be useful for specific
4093 optimizations. */
4094 /* Count number of leading zeros. */
4095 for (i = 31; i >= 0; i--)
4097 if ((remainder & (1 << i)) == 0)
4098 clear_sign_bit_copies++;
4099 else
4100 break;
4103 /* Count number of leading 1's. */
4104 for (i = 31; i >= 0; i--)
4106 if ((remainder & (1 << i)) != 0)
4107 set_sign_bit_copies++;
4108 else
4109 break;
4112 /* Count number of trailing zero's. */
4113 for (i = 0; i <= 31; i++)
4115 if ((remainder & (1 << i)) == 0)
4116 clear_zero_bit_copies++;
4117 else
4118 break;
4121 /* Count number of trailing 1's. */
4122 for (i = 0; i <= 31; i++)
4124 if ((remainder & (1 << i)) != 0)
4125 set_zero_bit_copies++;
4126 else
4127 break;
4130 switch (code)
4132 case SET:
4133 /* See if we can do this by sign_extending a constant that is known
4134 to be negative. This is a good, way of doing it, since the shift
4135 may well merge into a subsequent insn. */
4136 if (set_sign_bit_copies > 1)
4138 if (const_ok_for_arm
4139 (temp1 = ARM_SIGN_EXTEND (remainder
4140 << (set_sign_bit_copies - 1))))
4142 if (generate)
4144 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4145 emit_constant_insn (cond,
4146 gen_rtx_SET (VOIDmode, new_src,
4147 GEN_INT (temp1)));
4148 emit_constant_insn (cond,
4149 gen_ashrsi3 (target, new_src,
4150 GEN_INT (set_sign_bit_copies - 1)));
4152 return 2;
4154 /* For an inverted constant, we will need to set the low bits,
4155 these will be shifted out of harm's way. */
4156 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4157 if (const_ok_for_arm (~temp1))
4159 if (generate)
4161 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4162 emit_constant_insn (cond,
4163 gen_rtx_SET (VOIDmode, new_src,
4164 GEN_INT (temp1)));
4165 emit_constant_insn (cond,
4166 gen_ashrsi3 (target, new_src,
4167 GEN_INT (set_sign_bit_copies - 1)));
4169 return 2;
4173 /* See if we can calculate the value as the difference between two
4174 valid immediates. */
4175 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4177 int topshift = clear_sign_bit_copies & ~1;
4179 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4180 & (0xff000000 >> topshift));
4182 /* If temp1 is zero, then that means the 9 most significant
4183 bits of remainder were 1 and we've caused it to overflow.
4184 When topshift is 0 we don't need to do anything since we
4185 can borrow from 'bit 32'. */
4186 if (temp1 == 0 && topshift != 0)
4187 temp1 = 0x80000000 >> (topshift - 1);
4189 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4191 if (const_ok_for_arm (temp2))
4193 if (generate)
4195 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4196 emit_constant_insn (cond,
4197 gen_rtx_SET (VOIDmode, new_src,
4198 GEN_INT (temp1)));
4199 emit_constant_insn (cond,
4200 gen_addsi3 (target, new_src,
4201 GEN_INT (-temp2)));
4204 return 2;
4208 /* See if we can generate this by setting the bottom (or the top)
4209 16 bits, and then shifting these into the other half of the
4210 word. We only look for the simplest cases, to do more would cost
4211 too much. Be careful, however, not to generate this when the
4212 alternative would take fewer insns. */
4213 if (val & 0xffff0000)
4215 temp1 = remainder & 0xffff0000;
4216 temp2 = remainder & 0x0000ffff;
4218 /* Overlaps outside this range are best done using other methods. */
4219 for (i = 9; i < 24; i++)
4221 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4222 && !const_ok_for_arm (temp2))
4224 rtx new_src = (subtargets
4225 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4226 : target);
4227 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4228 source, subtargets, generate);
4229 source = new_src;
4230 if (generate)
4231 emit_constant_insn
4232 (cond,
4233 gen_rtx_SET
4234 (VOIDmode, target,
4235 gen_rtx_IOR (mode,
4236 gen_rtx_ASHIFT (mode, source,
4237 GEN_INT (i)),
4238 source)));
4239 return insns + 1;
4243 /* Don't duplicate cases already considered. */
4244 for (i = 17; i < 24; i++)
4246 if (((temp1 | (temp1 >> i)) == remainder)
4247 && !const_ok_for_arm (temp1))
4249 rtx new_src = (subtargets
4250 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4251 : target);
4252 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4253 source, subtargets, generate);
4254 source = new_src;
4255 if (generate)
4256 emit_constant_insn
4257 (cond,
4258 gen_rtx_SET (VOIDmode, target,
4259 gen_rtx_IOR
4260 (mode,
4261 gen_rtx_LSHIFTRT (mode, source,
4262 GEN_INT (i)),
4263 source)));
4264 return insns + 1;
4268 break;
4270 case IOR:
4271 case XOR:
4272 /* If we have IOR or XOR, and the constant can be loaded in a
4273 single instruction, and we can find a temporary to put it in,
4274 then this can be done in two instructions instead of 3-4. */
4275 if (subtargets
4276 /* TARGET can't be NULL if SUBTARGETS is 0 */
4277 || (reload_completed && !reg_mentioned_p (target, source)))
4279 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4281 if (generate)
4283 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4285 emit_constant_insn (cond,
4286 gen_rtx_SET (VOIDmode, sub,
4287 GEN_INT (val)));
4288 emit_constant_insn (cond,
4289 gen_rtx_SET (VOIDmode, target,
4290 gen_rtx_fmt_ee (code, mode,
4291 source, sub)));
4293 return 2;
4297 if (code == XOR)
4298 break;
4300 /* Convert.
4301 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4302 and the remainder 0s for e.g. 0xfff00000)
4303 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4305 This can be done in 2 instructions by using shifts with mov or mvn.
4306 e.g. for
4307 x = x | 0xfff00000;
4308 we generate.
4309 mvn r0, r0, asl #12
4310 mvn r0, r0, lsr #12 */
4311 if (set_sign_bit_copies > 8
4312 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4314 if (generate)
4316 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4317 rtx shift = GEN_INT (set_sign_bit_copies);
4319 emit_constant_insn
4320 (cond,
4321 gen_rtx_SET (VOIDmode, sub,
4322 gen_rtx_NOT (mode,
4323 gen_rtx_ASHIFT (mode,
4324 source,
4325 shift))));
4326 emit_constant_insn
4327 (cond,
4328 gen_rtx_SET (VOIDmode, target,
4329 gen_rtx_NOT (mode,
4330 gen_rtx_LSHIFTRT (mode, sub,
4331 shift))));
4333 return 2;
4336 /* Convert
4337 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4339 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4341 For eg. r0 = r0 | 0xfff
4342 mvn r0, r0, lsr #12
4343 mvn r0, r0, asl #12
4346 if (set_zero_bit_copies > 8
4347 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4349 if (generate)
4351 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4352 rtx shift = GEN_INT (set_zero_bit_copies);
4354 emit_constant_insn
4355 (cond,
4356 gen_rtx_SET (VOIDmode, sub,
4357 gen_rtx_NOT (mode,
4358 gen_rtx_LSHIFTRT (mode,
4359 source,
4360 shift))));
4361 emit_constant_insn
4362 (cond,
4363 gen_rtx_SET (VOIDmode, target,
4364 gen_rtx_NOT (mode,
4365 gen_rtx_ASHIFT (mode, sub,
4366 shift))));
4368 return 2;
4371 /* This will never be reached for Thumb2 because orn is a valid
4372 instruction. This is for Thumb1 and the ARM 32 bit cases.
4374 x = y | constant (such that ~constant is a valid constant)
4375 Transform this to
4376 x = ~(~y & ~constant).
4378 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4380 if (generate)
4382 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4383 emit_constant_insn (cond,
4384 gen_rtx_SET (VOIDmode, sub,
4385 gen_rtx_NOT (mode, source)));
4386 source = sub;
4387 if (subtargets)
4388 sub = gen_reg_rtx (mode);
4389 emit_constant_insn (cond,
4390 gen_rtx_SET (VOIDmode, sub,
4391 gen_rtx_AND (mode, source,
4392 GEN_INT (temp1))));
4393 emit_constant_insn (cond,
4394 gen_rtx_SET (VOIDmode, target,
4395 gen_rtx_NOT (mode, sub)));
4397 return 3;
4399 break;
4401 case AND:
4402 /* See if two shifts will do 2 or more insn's worth of work. */
4403 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4405 HOST_WIDE_INT shift_mask = ((0xffffffff
4406 << (32 - clear_sign_bit_copies))
4407 & 0xffffffff);
4409 if ((remainder | shift_mask) != 0xffffffff)
4411 if (generate)
4413 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4414 insns = arm_gen_constant (AND, mode, cond,
4415 remainder | shift_mask,
4416 new_src, source, subtargets, 1);
4417 source = new_src;
4419 else
4421 rtx targ = subtargets ? NULL_RTX : target;
4422 insns = arm_gen_constant (AND, mode, cond,
4423 remainder | shift_mask,
4424 targ, source, subtargets, 0);
4428 if (generate)
4430 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4431 rtx shift = GEN_INT (clear_sign_bit_copies);
4433 emit_insn (gen_ashlsi3 (new_src, source, shift));
4434 emit_insn (gen_lshrsi3 (target, new_src, shift));
4437 return insns + 2;
4440 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4442 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4444 if ((remainder | shift_mask) != 0xffffffff)
4446 if (generate)
4448 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4450 insns = arm_gen_constant (AND, mode, cond,
4451 remainder | shift_mask,
4452 new_src, source, subtargets, 1);
4453 source = new_src;
4455 else
4457 rtx targ = subtargets ? NULL_RTX : target;
4459 insns = arm_gen_constant (AND, mode, cond,
4460 remainder | shift_mask,
4461 targ, source, subtargets, 0);
4465 if (generate)
4467 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4468 rtx shift = GEN_INT (clear_zero_bit_copies);
4470 emit_insn (gen_lshrsi3 (new_src, source, shift));
4471 emit_insn (gen_ashlsi3 (target, new_src, shift));
4474 return insns + 2;
4477 break;
4479 default:
4480 break;
4483 /* Calculate what the instruction sequences would be if we generated it
4484 normally, negated, or inverted. */
4485 if (code == AND)
4486 /* AND cannot be split into multiple insns, so invert and use BIC. */
4487 insns = 99;
4488 else
4489 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4491 if (can_negate)
4492 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4493 &neg_immediates);
4494 else
4495 neg_insns = 99;
4497 if (can_invert || final_invert)
4498 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4499 &inv_immediates);
4500 else
4501 inv_insns = 99;
4503 immediates = &pos_immediates;
4505 /* Is the negated immediate sequence more efficient? */
4506 if (neg_insns < insns && neg_insns <= inv_insns)
4508 insns = neg_insns;
4509 immediates = &neg_immediates;
4511 else
4512 can_negate = 0;
4514 /* Is the inverted immediate sequence more efficient?
4515 We must allow for an extra NOT instruction for XOR operations, although
4516 there is some chance that the final 'mvn' will get optimized later. */
4517 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4519 insns = inv_insns;
4520 immediates = &inv_immediates;
4522 else
4524 can_invert = 0;
4525 final_invert = 0;
4528 /* Now output the chosen sequence as instructions. */
4529 if (generate)
4531 for (i = 0; i < insns; i++)
4533 rtx new_src, temp1_rtx;
4535 temp1 = immediates->i[i];
4537 if (code == SET || code == MINUS)
4538 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4539 else if ((final_invert || i < (insns - 1)) && subtargets)
4540 new_src = gen_reg_rtx (mode);
4541 else
4542 new_src = target;
4544 if (can_invert)
4545 temp1 = ~temp1;
4546 else if (can_negate)
4547 temp1 = -temp1;
4549 temp1 = trunc_int_for_mode (temp1, mode);
4550 temp1_rtx = GEN_INT (temp1);
4552 if (code == SET)
4554 else if (code == MINUS)
4555 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4556 else
4557 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4559 emit_constant_insn (cond,
4560 gen_rtx_SET (VOIDmode, new_src,
4561 temp1_rtx));
4562 source = new_src;
4564 if (code == SET)
4566 can_negate = can_invert;
4567 can_invert = 0;
4568 code = PLUS;
4570 else if (code == MINUS)
4571 code = PLUS;
4575 if (final_invert)
4577 if (generate)
4578 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4579 gen_rtx_NOT (mode, source)));
4580 insns++;
4583 return insns;
4586 /* Canonicalize a comparison so that we are more likely to recognize it.
4587 This can be done for a few constant compares, where we can make the
4588 immediate value easier to load. */
4590 static void
4591 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4592 bool op0_preserve_value)
4594 machine_mode mode;
4595 unsigned HOST_WIDE_INT i, maxval;
4597 mode = GET_MODE (*op0);
4598 if (mode == VOIDmode)
4599 mode = GET_MODE (*op1);
4601 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4603 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4604 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4605 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4606 for GTU/LEU in Thumb mode. */
4607 if (mode == DImode)
4609 rtx tem;
4611 if (*code == GT || *code == LE
4612 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4614 /* Missing comparison. First try to use an available
4615 comparison. */
4616 if (CONST_INT_P (*op1))
4618 i = INTVAL (*op1);
4619 switch (*code)
4621 case GT:
4622 case LE:
4623 if (i != maxval
4624 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4626 *op1 = GEN_INT (i + 1);
4627 *code = *code == GT ? GE : LT;
4628 return;
4630 break;
4631 case GTU:
4632 case LEU:
4633 if (i != ~((unsigned HOST_WIDE_INT) 0)
4634 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4636 *op1 = GEN_INT (i + 1);
4637 *code = *code == GTU ? GEU : LTU;
4638 return;
4640 break;
4641 default:
4642 gcc_unreachable ();
4646 /* If that did not work, reverse the condition. */
4647 if (!op0_preserve_value)
4649 tem = *op0;
4650 *op0 = *op1;
4651 *op1 = tem;
4652 *code = (int)swap_condition ((enum rtx_code)*code);
4655 return;
4658 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4659 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4660 to facilitate possible combining with a cmp into 'ands'. */
4661 if (mode == SImode
4662 && GET_CODE (*op0) == ZERO_EXTEND
4663 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4664 && GET_MODE (XEXP (*op0, 0)) == QImode
4665 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4666 && subreg_lowpart_p (XEXP (*op0, 0))
4667 && *op1 == const0_rtx)
4668 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4669 GEN_INT (255));
4671 /* Comparisons smaller than DImode. Only adjust comparisons against
4672 an out-of-range constant. */
4673 if (!CONST_INT_P (*op1)
4674 || const_ok_for_arm (INTVAL (*op1))
4675 || const_ok_for_arm (- INTVAL (*op1)))
4676 return;
4678 i = INTVAL (*op1);
4680 switch (*code)
4682 case EQ:
4683 case NE:
4684 return;
4686 case GT:
4687 case LE:
4688 if (i != maxval
4689 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4691 *op1 = GEN_INT (i + 1);
4692 *code = *code == GT ? GE : LT;
4693 return;
4695 break;
4697 case GE:
4698 case LT:
4699 if (i != ~maxval
4700 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4702 *op1 = GEN_INT (i - 1);
4703 *code = *code == GE ? GT : LE;
4704 return;
4706 break;
4708 case GTU:
4709 case LEU:
4710 if (i != ~((unsigned HOST_WIDE_INT) 0)
4711 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4713 *op1 = GEN_INT (i + 1);
4714 *code = *code == GTU ? GEU : LTU;
4715 return;
4717 break;
4719 case GEU:
4720 case LTU:
4721 if (i != 0
4722 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4724 *op1 = GEN_INT (i - 1);
4725 *code = *code == GEU ? GTU : LEU;
4726 return;
4728 break;
4730 default:
4731 gcc_unreachable ();
4736 /* Define how to find the value returned by a function. */
4738 static rtx
4739 arm_function_value(const_tree type, const_tree func,
4740 bool outgoing ATTRIBUTE_UNUSED)
4742 machine_mode mode;
4743 int unsignedp ATTRIBUTE_UNUSED;
4744 rtx r ATTRIBUTE_UNUSED;
4746 mode = TYPE_MODE (type);
4748 if (TARGET_AAPCS_BASED)
4749 return aapcs_allocate_return_reg (mode, type, func);
4751 /* Promote integer types. */
4752 if (INTEGRAL_TYPE_P (type))
4753 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4755 /* Promotes small structs returned in a register to full-word size
4756 for big-endian AAPCS. */
4757 if (arm_return_in_msb (type))
4759 HOST_WIDE_INT size = int_size_in_bytes (type);
4760 if (size % UNITS_PER_WORD != 0)
4762 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4763 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4767 return arm_libcall_value_1 (mode);
4770 /* libcall hashtable helpers. */
4772 struct libcall_hasher : typed_noop_remove <rtx_def>
4774 typedef rtx_def value_type;
4775 typedef rtx_def compare_type;
4776 static inline hashval_t hash (const value_type *);
4777 static inline bool equal (const value_type *, const compare_type *);
4778 static inline void remove (value_type *);
4781 inline bool
4782 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4784 return rtx_equal_p (p1, p2);
4787 inline hashval_t
4788 libcall_hasher::hash (const value_type *p1)
4790 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4793 typedef hash_table<libcall_hasher> libcall_table_type;
4795 static void
4796 add_libcall (libcall_table_type *htab, rtx libcall)
4798 *htab->find_slot (libcall, INSERT) = libcall;
4801 static bool
4802 arm_libcall_uses_aapcs_base (const_rtx libcall)
4804 static bool init_done = false;
4805 static libcall_table_type *libcall_htab = NULL;
4807 if (!init_done)
4809 init_done = true;
4811 libcall_htab = new libcall_table_type (31);
4812 add_libcall (libcall_htab,
4813 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4814 add_libcall (libcall_htab,
4815 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4816 add_libcall (libcall_htab,
4817 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4818 add_libcall (libcall_htab,
4819 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4821 add_libcall (libcall_htab,
4822 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4823 add_libcall (libcall_htab,
4824 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4825 add_libcall (libcall_htab,
4826 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4827 add_libcall (libcall_htab,
4828 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4830 add_libcall (libcall_htab,
4831 convert_optab_libfunc (sext_optab, SFmode, HFmode));
4832 add_libcall (libcall_htab,
4833 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4834 add_libcall (libcall_htab,
4835 convert_optab_libfunc (sfix_optab, SImode, DFmode));
4836 add_libcall (libcall_htab,
4837 convert_optab_libfunc (ufix_optab, SImode, DFmode));
4838 add_libcall (libcall_htab,
4839 convert_optab_libfunc (sfix_optab, DImode, DFmode));
4840 add_libcall (libcall_htab,
4841 convert_optab_libfunc (ufix_optab, DImode, DFmode));
4842 add_libcall (libcall_htab,
4843 convert_optab_libfunc (sfix_optab, DImode, SFmode));
4844 add_libcall (libcall_htab,
4845 convert_optab_libfunc (ufix_optab, DImode, SFmode));
4847 /* Values from double-precision helper functions are returned in core
4848 registers if the selected core only supports single-precision
4849 arithmetic, even if we are using the hard-float ABI. The same is
4850 true for single-precision helpers, but we will never be using the
4851 hard-float ABI on a CPU which doesn't support single-precision
4852 operations in hardware. */
4853 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4854 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4855 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4856 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4857 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4858 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4859 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4860 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4861 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4862 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4863 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4864 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4865 SFmode));
4866 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4867 DFmode));
4870 return libcall && libcall_htab->find (libcall) != NULL;
4873 static rtx
4874 arm_libcall_value_1 (machine_mode mode)
4876 if (TARGET_AAPCS_BASED)
4877 return aapcs_libcall_value (mode);
4878 else if (TARGET_IWMMXT_ABI
4879 && arm_vector_mode_supported_p (mode))
4880 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
4881 else
4882 return gen_rtx_REG (mode, ARG_REGISTER (1));
4885 /* Define how to find the value returned by a library function
4886 assuming the value has mode MODE. */
4888 static rtx
4889 arm_libcall_value (machine_mode mode, const_rtx libcall)
4891 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
4892 && GET_MODE_CLASS (mode) == MODE_FLOAT)
4894 /* The following libcalls return their result in integer registers,
4895 even though they return a floating point value. */
4896 if (arm_libcall_uses_aapcs_base (libcall))
4897 return gen_rtx_REG (mode, ARG_REGISTER(1));
4901 return arm_libcall_value_1 (mode);
4904 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4906 static bool
4907 arm_function_value_regno_p (const unsigned int regno)
4909 if (regno == ARG_REGISTER (1)
4910 || (TARGET_32BIT
4911 && TARGET_AAPCS_BASED
4912 && TARGET_VFP
4913 && TARGET_HARD_FLOAT
4914 && regno == FIRST_VFP_REGNUM)
4915 || (TARGET_IWMMXT_ABI
4916 && regno == FIRST_IWMMXT_REGNUM))
4917 return true;
4919 return false;
4922 /* Determine the amount of memory needed to store the possible return
4923 registers of an untyped call. */
4925 arm_apply_result_size (void)
4927 int size = 16;
4929 if (TARGET_32BIT)
4931 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4932 size += 32;
4933 if (TARGET_IWMMXT_ABI)
4934 size += 8;
4937 return size;
4940 /* Decide whether TYPE should be returned in memory (true)
4941 or in a register (false). FNTYPE is the type of the function making
4942 the call. */
4943 static bool
4944 arm_return_in_memory (const_tree type, const_tree fntype)
4946 HOST_WIDE_INT size;
4948 size = int_size_in_bytes (type); /* Negative if not fixed size. */
4950 if (TARGET_AAPCS_BASED)
4952 /* Simple, non-aggregate types (ie not including vectors and
4953 complex) are always returned in a register (or registers).
4954 We don't care about which register here, so we can short-cut
4955 some of the detail. */
4956 if (!AGGREGATE_TYPE_P (type)
4957 && TREE_CODE (type) != VECTOR_TYPE
4958 && TREE_CODE (type) != COMPLEX_TYPE)
4959 return false;
4961 /* Any return value that is no larger than one word can be
4962 returned in r0. */
4963 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
4964 return false;
4966 /* Check any available co-processors to see if they accept the
4967 type as a register candidate (VFP, for example, can return
4968 some aggregates in consecutive registers). These aren't
4969 available if the call is variadic. */
4970 if (aapcs_select_return_coproc (type, fntype) >= 0)
4971 return false;
4973 /* Vector values should be returned using ARM registers, not
4974 memory (unless they're over 16 bytes, which will break since
4975 we only have four call-clobbered registers to play with). */
4976 if (TREE_CODE (type) == VECTOR_TYPE)
4977 return (size < 0 || size > (4 * UNITS_PER_WORD));
4979 /* The rest go in memory. */
4980 return true;
4983 if (TREE_CODE (type) == VECTOR_TYPE)
4984 return (size < 0 || size > (4 * UNITS_PER_WORD));
4986 if (!AGGREGATE_TYPE_P (type) &&
4987 (TREE_CODE (type) != VECTOR_TYPE))
4988 /* All simple types are returned in registers. */
4989 return false;
4991 if (arm_abi != ARM_ABI_APCS)
4993 /* ATPCS and later return aggregate types in memory only if they are
4994 larger than a word (or are variable size). */
4995 return (size < 0 || size > UNITS_PER_WORD);
4998 /* For the arm-wince targets we choose to be compatible with Microsoft's
4999 ARM and Thumb compilers, which always return aggregates in memory. */
5000 #ifndef ARM_WINCE
5001 /* All structures/unions bigger than one word are returned in memory.
5002 Also catch the case where int_size_in_bytes returns -1. In this case
5003 the aggregate is either huge or of variable size, and in either case
5004 we will want to return it via memory and not in a register. */
5005 if (size < 0 || size > UNITS_PER_WORD)
5006 return true;
5008 if (TREE_CODE (type) == RECORD_TYPE)
5010 tree field;
5012 /* For a struct the APCS says that we only return in a register
5013 if the type is 'integer like' and every addressable element
5014 has an offset of zero. For practical purposes this means
5015 that the structure can have at most one non bit-field element
5016 and that this element must be the first one in the structure. */
5018 /* Find the first field, ignoring non FIELD_DECL things which will
5019 have been created by C++. */
5020 for (field = TYPE_FIELDS (type);
5021 field && TREE_CODE (field) != FIELD_DECL;
5022 field = DECL_CHAIN (field))
5023 continue;
5025 if (field == NULL)
5026 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5028 /* Check that the first field is valid for returning in a register. */
5030 /* ... Floats are not allowed */
5031 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5032 return true;
5034 /* ... Aggregates that are not themselves valid for returning in
5035 a register are not allowed. */
5036 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5037 return true;
5039 /* Now check the remaining fields, if any. Only bitfields are allowed,
5040 since they are not addressable. */
5041 for (field = DECL_CHAIN (field);
5042 field;
5043 field = DECL_CHAIN (field))
5045 if (TREE_CODE (field) != FIELD_DECL)
5046 continue;
5048 if (!DECL_BIT_FIELD_TYPE (field))
5049 return true;
5052 return false;
5055 if (TREE_CODE (type) == UNION_TYPE)
5057 tree field;
5059 /* Unions can be returned in registers if every element is
5060 integral, or can be returned in an integer register. */
5061 for (field = TYPE_FIELDS (type);
5062 field;
5063 field = DECL_CHAIN (field))
5065 if (TREE_CODE (field) != FIELD_DECL)
5066 continue;
5068 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5069 return true;
5071 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5072 return true;
5075 return false;
5077 #endif /* not ARM_WINCE */
5079 /* Return all other types in memory. */
5080 return true;
5083 const struct pcs_attribute_arg
5085 const char *arg;
5086 enum arm_pcs value;
5087 } pcs_attribute_args[] =
5089 {"aapcs", ARM_PCS_AAPCS},
5090 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5091 #if 0
5092 /* We could recognize these, but changes would be needed elsewhere
5093 * to implement them. */
5094 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5095 {"atpcs", ARM_PCS_ATPCS},
5096 {"apcs", ARM_PCS_APCS},
5097 #endif
5098 {NULL, ARM_PCS_UNKNOWN}
5101 static enum arm_pcs
5102 arm_pcs_from_attribute (tree attr)
5104 const struct pcs_attribute_arg *ptr;
5105 const char *arg;
5107 /* Get the value of the argument. */
5108 if (TREE_VALUE (attr) == NULL_TREE
5109 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5110 return ARM_PCS_UNKNOWN;
5112 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5114 /* Check it against the list of known arguments. */
5115 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5116 if (streq (arg, ptr->arg))
5117 return ptr->value;
5119 /* An unrecognized interrupt type. */
5120 return ARM_PCS_UNKNOWN;
5123 /* Get the PCS variant to use for this call. TYPE is the function's type
5124 specification, DECL is the specific declartion. DECL may be null if
5125 the call could be indirect or if this is a library call. */
5126 static enum arm_pcs
5127 arm_get_pcs_model (const_tree type, const_tree decl)
5129 bool user_convention = false;
5130 enum arm_pcs user_pcs = arm_pcs_default;
5131 tree attr;
5133 gcc_assert (type);
5135 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5136 if (attr)
5138 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5139 user_convention = true;
5142 if (TARGET_AAPCS_BASED)
5144 /* Detect varargs functions. These always use the base rules
5145 (no argument is ever a candidate for a co-processor
5146 register). */
5147 bool base_rules = stdarg_p (type);
5149 if (user_convention)
5151 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5152 sorry ("non-AAPCS derived PCS variant");
5153 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5154 error ("variadic functions must use the base AAPCS variant");
5157 if (base_rules)
5158 return ARM_PCS_AAPCS;
5159 else if (user_convention)
5160 return user_pcs;
5161 else if (decl && flag_unit_at_a_time)
5163 /* Local functions never leak outside this compilation unit,
5164 so we are free to use whatever conventions are
5165 appropriate. */
5166 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5167 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5168 if (i && i->local)
5169 return ARM_PCS_AAPCS_LOCAL;
5172 else if (user_convention && user_pcs != arm_pcs_default)
5173 sorry ("PCS variant");
5175 /* For everything else we use the target's default. */
5176 return arm_pcs_default;
5180 static void
5181 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5182 const_tree fntype ATTRIBUTE_UNUSED,
5183 rtx libcall ATTRIBUTE_UNUSED,
5184 const_tree fndecl ATTRIBUTE_UNUSED)
5186 /* Record the unallocated VFP registers. */
5187 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5188 pcum->aapcs_vfp_reg_alloc = 0;
5191 /* Walk down the type tree of TYPE counting consecutive base elements.
5192 If *MODEP is VOIDmode, then set it to the first valid floating point
5193 type. If a non-floating point type is found, or if a floating point
5194 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5195 otherwise return the count in the sub-tree. */
5196 static int
5197 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5199 machine_mode mode;
5200 HOST_WIDE_INT size;
5202 switch (TREE_CODE (type))
5204 case REAL_TYPE:
5205 mode = TYPE_MODE (type);
5206 if (mode != DFmode && mode != SFmode)
5207 return -1;
5209 if (*modep == VOIDmode)
5210 *modep = mode;
5212 if (*modep == mode)
5213 return 1;
5215 break;
5217 case COMPLEX_TYPE:
5218 mode = TYPE_MODE (TREE_TYPE (type));
5219 if (mode != DFmode && mode != SFmode)
5220 return -1;
5222 if (*modep == VOIDmode)
5223 *modep = mode;
5225 if (*modep == mode)
5226 return 2;
5228 break;
5230 case VECTOR_TYPE:
5231 /* Use V2SImode and V4SImode as representatives of all 64-bit
5232 and 128-bit vector types, whether or not those modes are
5233 supported with the present options. */
5234 size = int_size_in_bytes (type);
5235 switch (size)
5237 case 8:
5238 mode = V2SImode;
5239 break;
5240 case 16:
5241 mode = V4SImode;
5242 break;
5243 default:
5244 return -1;
5247 if (*modep == VOIDmode)
5248 *modep = mode;
5250 /* Vector modes are considered to be opaque: two vectors are
5251 equivalent for the purposes of being homogeneous aggregates
5252 if they are the same size. */
5253 if (*modep == mode)
5254 return 1;
5256 break;
5258 case ARRAY_TYPE:
5260 int count;
5261 tree index = TYPE_DOMAIN (type);
5263 /* Can't handle incomplete types nor sizes that are not
5264 fixed. */
5265 if (!COMPLETE_TYPE_P (type)
5266 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5267 return -1;
5269 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5270 if (count == -1
5271 || !index
5272 || !TYPE_MAX_VALUE (index)
5273 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5274 || !TYPE_MIN_VALUE (index)
5275 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5276 || count < 0)
5277 return -1;
5279 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5280 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5282 /* There must be no padding. */
5283 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5284 return -1;
5286 return count;
5289 case RECORD_TYPE:
5291 int count = 0;
5292 int sub_count;
5293 tree field;
5295 /* Can't handle incomplete types nor sizes that are not
5296 fixed. */
5297 if (!COMPLETE_TYPE_P (type)
5298 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5299 return -1;
5301 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5303 if (TREE_CODE (field) != FIELD_DECL)
5304 continue;
5306 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5307 if (sub_count < 0)
5308 return -1;
5309 count += sub_count;
5312 /* There must be no padding. */
5313 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5314 return -1;
5316 return count;
5319 case UNION_TYPE:
5320 case QUAL_UNION_TYPE:
5322 /* These aren't very interesting except in a degenerate case. */
5323 int count = 0;
5324 int sub_count;
5325 tree field;
5327 /* Can't handle incomplete types nor sizes that are not
5328 fixed. */
5329 if (!COMPLETE_TYPE_P (type)
5330 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5331 return -1;
5333 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5335 if (TREE_CODE (field) != FIELD_DECL)
5336 continue;
5338 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5339 if (sub_count < 0)
5340 return -1;
5341 count = count > sub_count ? count : sub_count;
5344 /* There must be no padding. */
5345 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5346 return -1;
5348 return count;
5351 default:
5352 break;
5355 return -1;
5358 /* Return true if PCS_VARIANT should use VFP registers. */
5359 static bool
5360 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5362 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5364 static bool seen_thumb1_vfp = false;
5366 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5368 sorry ("Thumb-1 hard-float VFP ABI");
5369 /* sorry() is not immediately fatal, so only display this once. */
5370 seen_thumb1_vfp = true;
5373 return true;
5376 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5377 return false;
5379 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5380 (TARGET_VFP_DOUBLE || !is_double));
5383 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5384 suitable for passing or returning in VFP registers for the PCS
5385 variant selected. If it is, then *BASE_MODE is updated to contain
5386 a machine mode describing each element of the argument's type and
5387 *COUNT to hold the number of such elements. */
5388 static bool
5389 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5390 machine_mode mode, const_tree type,
5391 machine_mode *base_mode, int *count)
5393 machine_mode new_mode = VOIDmode;
5395 /* If we have the type information, prefer that to working things
5396 out from the mode. */
5397 if (type)
5399 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5401 if (ag_count > 0 && ag_count <= 4)
5402 *count = ag_count;
5403 else
5404 return false;
5406 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5407 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5408 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5410 *count = 1;
5411 new_mode = mode;
5413 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5415 *count = 2;
5416 new_mode = (mode == DCmode ? DFmode : SFmode);
5418 else
5419 return false;
5422 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5423 return false;
5425 *base_mode = new_mode;
5426 return true;
5429 static bool
5430 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5431 machine_mode mode, const_tree type)
5433 int count ATTRIBUTE_UNUSED;
5434 machine_mode ag_mode ATTRIBUTE_UNUSED;
5436 if (!use_vfp_abi (pcs_variant, false))
5437 return false;
5438 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5439 &ag_mode, &count);
5442 static bool
5443 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5444 const_tree type)
5446 if (!use_vfp_abi (pcum->pcs_variant, false))
5447 return false;
5449 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5450 &pcum->aapcs_vfp_rmode,
5451 &pcum->aapcs_vfp_rcount);
5454 static bool
5455 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5456 const_tree type ATTRIBUTE_UNUSED)
5458 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5459 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5460 int regno;
5462 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5463 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5465 pcum->aapcs_vfp_reg_alloc = mask << regno;
5466 if (mode == BLKmode
5467 || (mode == TImode && ! TARGET_NEON)
5468 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5470 int i;
5471 int rcount = pcum->aapcs_vfp_rcount;
5472 int rshift = shift;
5473 machine_mode rmode = pcum->aapcs_vfp_rmode;
5474 rtx par;
5475 if (!TARGET_NEON)
5477 /* Avoid using unsupported vector modes. */
5478 if (rmode == V2SImode)
5479 rmode = DImode;
5480 else if (rmode == V4SImode)
5482 rmode = DImode;
5483 rcount *= 2;
5484 rshift /= 2;
5487 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5488 for (i = 0; i < rcount; i++)
5490 rtx tmp = gen_rtx_REG (rmode,
5491 FIRST_VFP_REGNUM + regno + i * rshift);
5492 tmp = gen_rtx_EXPR_LIST
5493 (VOIDmode, tmp,
5494 GEN_INT (i * GET_MODE_SIZE (rmode)));
5495 XVECEXP (par, 0, i) = tmp;
5498 pcum->aapcs_reg = par;
5500 else
5501 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5502 return true;
5504 return false;
5507 static rtx
5508 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5509 machine_mode mode,
5510 const_tree type ATTRIBUTE_UNUSED)
5512 if (!use_vfp_abi (pcs_variant, false))
5513 return NULL;
5515 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5517 int count;
5518 machine_mode ag_mode;
5519 int i;
5520 rtx par;
5521 int shift;
5523 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5524 &ag_mode, &count);
5526 if (!TARGET_NEON)
5528 if (ag_mode == V2SImode)
5529 ag_mode = DImode;
5530 else if (ag_mode == V4SImode)
5532 ag_mode = DImode;
5533 count *= 2;
5536 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5537 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5538 for (i = 0; i < count; i++)
5540 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5541 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5542 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5543 XVECEXP (par, 0, i) = tmp;
5546 return par;
5549 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5552 static void
5553 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5554 machine_mode mode ATTRIBUTE_UNUSED,
5555 const_tree type ATTRIBUTE_UNUSED)
5557 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5558 pcum->aapcs_vfp_reg_alloc = 0;
5559 return;
5562 #define AAPCS_CP(X) \
5564 aapcs_ ## X ## _cum_init, \
5565 aapcs_ ## X ## _is_call_candidate, \
5566 aapcs_ ## X ## _allocate, \
5567 aapcs_ ## X ## _is_return_candidate, \
5568 aapcs_ ## X ## _allocate_return_reg, \
5569 aapcs_ ## X ## _advance \
5572 /* Table of co-processors that can be used to pass arguments in
5573 registers. Idealy no arugment should be a candidate for more than
5574 one co-processor table entry, but the table is processed in order
5575 and stops after the first match. If that entry then fails to put
5576 the argument into a co-processor register, the argument will go on
5577 the stack. */
5578 static struct
5580 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5581 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5583 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5584 BLKmode) is a candidate for this co-processor's registers; this
5585 function should ignore any position-dependent state in
5586 CUMULATIVE_ARGS and only use call-type dependent information. */
5587 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5589 /* Return true if the argument does get a co-processor register; it
5590 should set aapcs_reg to an RTX of the register allocated as is
5591 required for a return from FUNCTION_ARG. */
5592 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5594 /* Return true if a result of mode MODE (or type TYPE if MODE is
5595 BLKmode) is can be returned in this co-processor's registers. */
5596 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
5598 /* Allocate and return an RTX element to hold the return type of a
5599 call, this routine must not fail and will only be called if
5600 is_return_candidate returned true with the same parameters. */
5601 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
5603 /* Finish processing this argument and prepare to start processing
5604 the next one. */
5605 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5606 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5608 AAPCS_CP(vfp)
5611 #undef AAPCS_CP
5613 static int
5614 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
5615 const_tree type)
5617 int i;
5619 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5620 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5621 return i;
5623 return -1;
5626 static int
5627 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5629 /* We aren't passed a decl, so we can't check that a call is local.
5630 However, it isn't clear that that would be a win anyway, since it
5631 might limit some tail-calling opportunities. */
5632 enum arm_pcs pcs_variant;
5634 if (fntype)
5636 const_tree fndecl = NULL_TREE;
5638 if (TREE_CODE (fntype) == FUNCTION_DECL)
5640 fndecl = fntype;
5641 fntype = TREE_TYPE (fntype);
5644 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5646 else
5647 pcs_variant = arm_pcs_default;
5649 if (pcs_variant != ARM_PCS_AAPCS)
5651 int i;
5653 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5654 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5655 TYPE_MODE (type),
5656 type))
5657 return i;
5659 return -1;
5662 static rtx
5663 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
5664 const_tree fntype)
5666 /* We aren't passed a decl, so we can't check that a call is local.
5667 However, it isn't clear that that would be a win anyway, since it
5668 might limit some tail-calling opportunities. */
5669 enum arm_pcs pcs_variant;
5670 int unsignedp ATTRIBUTE_UNUSED;
5672 if (fntype)
5674 const_tree fndecl = NULL_TREE;
5676 if (TREE_CODE (fntype) == FUNCTION_DECL)
5678 fndecl = fntype;
5679 fntype = TREE_TYPE (fntype);
5682 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5684 else
5685 pcs_variant = arm_pcs_default;
5687 /* Promote integer types. */
5688 if (type && INTEGRAL_TYPE_P (type))
5689 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5691 if (pcs_variant != ARM_PCS_AAPCS)
5693 int i;
5695 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5696 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5697 type))
5698 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5699 mode, type);
5702 /* Promotes small structs returned in a register to full-word size
5703 for big-endian AAPCS. */
5704 if (type && arm_return_in_msb (type))
5706 HOST_WIDE_INT size = int_size_in_bytes (type);
5707 if (size % UNITS_PER_WORD != 0)
5709 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5710 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5714 return gen_rtx_REG (mode, R0_REGNUM);
5717 static rtx
5718 aapcs_libcall_value (machine_mode mode)
5720 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5721 && GET_MODE_SIZE (mode) <= 4)
5722 mode = SImode;
5724 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5727 /* Lay out a function argument using the AAPCS rules. The rule
5728 numbers referred to here are those in the AAPCS. */
5729 static void
5730 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
5731 const_tree type, bool named)
5733 int nregs, nregs2;
5734 int ncrn;
5736 /* We only need to do this once per argument. */
5737 if (pcum->aapcs_arg_processed)
5738 return;
5740 pcum->aapcs_arg_processed = true;
5742 /* Special case: if named is false then we are handling an incoming
5743 anonymous argument which is on the stack. */
5744 if (!named)
5745 return;
5747 /* Is this a potential co-processor register candidate? */
5748 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5750 int slot = aapcs_select_call_coproc (pcum, mode, type);
5751 pcum->aapcs_cprc_slot = slot;
5753 /* We don't have to apply any of the rules from part B of the
5754 preparation phase, these are handled elsewhere in the
5755 compiler. */
5757 if (slot >= 0)
5759 /* A Co-processor register candidate goes either in its own
5760 class of registers or on the stack. */
5761 if (!pcum->aapcs_cprc_failed[slot])
5763 /* C1.cp - Try to allocate the argument to co-processor
5764 registers. */
5765 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5766 return;
5768 /* C2.cp - Put the argument on the stack and note that we
5769 can't assign any more candidates in this slot. We also
5770 need to note that we have allocated stack space, so that
5771 we won't later try to split a non-cprc candidate between
5772 core registers and the stack. */
5773 pcum->aapcs_cprc_failed[slot] = true;
5774 pcum->can_split = false;
5777 /* We didn't get a register, so this argument goes on the
5778 stack. */
5779 gcc_assert (pcum->can_split == false);
5780 return;
5784 /* C3 - For double-word aligned arguments, round the NCRN up to the
5785 next even number. */
5786 ncrn = pcum->aapcs_ncrn;
5787 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5788 ncrn++;
5790 nregs = ARM_NUM_REGS2(mode, type);
5792 /* Sigh, this test should really assert that nregs > 0, but a GCC
5793 extension allows empty structs and then gives them empty size; it
5794 then allows such a structure to be passed by value. For some of
5795 the code below we have to pretend that such an argument has
5796 non-zero size so that we 'locate' it correctly either in
5797 registers or on the stack. */
5798 gcc_assert (nregs >= 0);
5800 nregs2 = nregs ? nregs : 1;
5802 /* C4 - Argument fits entirely in core registers. */
5803 if (ncrn + nregs2 <= NUM_ARG_REGS)
5805 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5806 pcum->aapcs_next_ncrn = ncrn + nregs;
5807 return;
5810 /* C5 - Some core registers left and there are no arguments already
5811 on the stack: split this argument between the remaining core
5812 registers and the stack. */
5813 if (ncrn < NUM_ARG_REGS && pcum->can_split)
5815 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5816 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5817 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5818 return;
5821 /* C6 - NCRN is set to 4. */
5822 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5824 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5825 return;
5828 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5829 for a call to a function whose data type is FNTYPE.
5830 For a library call, FNTYPE is NULL. */
5831 void
5832 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5833 rtx libname,
5834 tree fndecl ATTRIBUTE_UNUSED)
5836 /* Long call handling. */
5837 if (fntype)
5838 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5839 else
5840 pcum->pcs_variant = arm_pcs_default;
5842 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5844 if (arm_libcall_uses_aapcs_base (libname))
5845 pcum->pcs_variant = ARM_PCS_AAPCS;
5847 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5848 pcum->aapcs_reg = NULL_RTX;
5849 pcum->aapcs_partial = 0;
5850 pcum->aapcs_arg_processed = false;
5851 pcum->aapcs_cprc_slot = -1;
5852 pcum->can_split = true;
5854 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5856 int i;
5858 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5860 pcum->aapcs_cprc_failed[i] = false;
5861 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5864 return;
5867 /* Legacy ABIs */
5869 /* On the ARM, the offset starts at 0. */
5870 pcum->nregs = 0;
5871 pcum->iwmmxt_nregs = 0;
5872 pcum->can_split = true;
5874 /* Varargs vectors are treated the same as long long.
5875 named_count avoids having to change the way arm handles 'named' */
5876 pcum->named_count = 0;
5877 pcum->nargs = 0;
5879 if (TARGET_REALLY_IWMMXT && fntype)
5881 tree fn_arg;
5883 for (fn_arg = TYPE_ARG_TYPES (fntype);
5884 fn_arg;
5885 fn_arg = TREE_CHAIN (fn_arg))
5886 pcum->named_count += 1;
5888 if (! pcum->named_count)
5889 pcum->named_count = INT_MAX;
5893 /* Return true if we use LRA instead of reload pass. */
5894 static bool
5895 arm_lra_p (void)
5897 return arm_lra_flag;
5900 /* Return true if mode/type need doubleword alignment. */
5901 static bool
5902 arm_needs_doubleword_align (machine_mode mode, const_tree type)
5904 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
5905 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
5909 /* Determine where to put an argument to a function.
5910 Value is zero to push the argument on the stack,
5911 or a hard register in which to store the argument.
5913 MODE is the argument's machine mode.
5914 TYPE is the data type of the argument (as a tree).
5915 This is null for libcalls where that information may
5916 not be available.
5917 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5918 the preceding args and about the function being called.
5919 NAMED is nonzero if this argument is a named parameter
5920 (otherwise it is an extra parameter matching an ellipsis).
5922 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5923 other arguments are passed on the stack. If (NAMED == 0) (which happens
5924 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5925 defined), say it is passed in the stack (function_prologue will
5926 indeed make it pass in the stack if necessary). */
5928 static rtx
5929 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
5930 const_tree type, bool named)
5932 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5933 int nregs;
5935 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5936 a call insn (op3 of a call_value insn). */
5937 if (mode == VOIDmode)
5938 return const0_rtx;
5940 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5942 aapcs_layout_arg (pcum, mode, type, named);
5943 return pcum->aapcs_reg;
5946 /* Varargs vectors are treated the same as long long.
5947 named_count avoids having to change the way arm handles 'named' */
5948 if (TARGET_IWMMXT_ABI
5949 && arm_vector_mode_supported_p (mode)
5950 && pcum->named_count > pcum->nargs + 1)
5952 if (pcum->iwmmxt_nregs <= 9)
5953 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
5954 else
5956 pcum->can_split = false;
5957 return NULL_RTX;
5961 /* Put doubleword aligned quantities in even register pairs. */
5962 if (pcum->nregs & 1
5963 && ARM_DOUBLEWORD_ALIGN
5964 && arm_needs_doubleword_align (mode, type))
5965 pcum->nregs++;
5967 /* Only allow splitting an arg between regs and memory if all preceding
5968 args were allocated to regs. For args passed by reference we only count
5969 the reference pointer. */
5970 if (pcum->can_split)
5971 nregs = 1;
5972 else
5973 nregs = ARM_NUM_REGS2 (mode, type);
5975 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
5976 return NULL_RTX;
5978 return gen_rtx_REG (mode, pcum->nregs);
5981 static unsigned int
5982 arm_function_arg_boundary (machine_mode mode, const_tree type)
5984 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
5985 ? DOUBLEWORD_ALIGNMENT
5986 : PARM_BOUNDARY);
5989 static int
5990 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
5991 tree type, bool named)
5993 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5994 int nregs = pcum->nregs;
5996 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5998 aapcs_layout_arg (pcum, mode, type, named);
5999 return pcum->aapcs_partial;
6002 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6003 return 0;
6005 if (NUM_ARG_REGS > nregs
6006 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6007 && pcum->can_split)
6008 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6010 return 0;
6013 /* Update the data in PCUM to advance over an argument
6014 of mode MODE and data type TYPE.
6015 (TYPE is null for libcalls where that information may not be available.) */
6017 static void
6018 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6019 const_tree type, bool named)
6021 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6023 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6025 aapcs_layout_arg (pcum, mode, type, named);
6027 if (pcum->aapcs_cprc_slot >= 0)
6029 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6030 type);
6031 pcum->aapcs_cprc_slot = -1;
6034 /* Generic stuff. */
6035 pcum->aapcs_arg_processed = false;
6036 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6037 pcum->aapcs_reg = NULL_RTX;
6038 pcum->aapcs_partial = 0;
6040 else
6042 pcum->nargs += 1;
6043 if (arm_vector_mode_supported_p (mode)
6044 && pcum->named_count > pcum->nargs
6045 && TARGET_IWMMXT_ABI)
6046 pcum->iwmmxt_nregs += 1;
6047 else
6048 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6052 /* Variable sized types are passed by reference. This is a GCC
6053 extension to the ARM ABI. */
6055 static bool
6056 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6057 machine_mode mode ATTRIBUTE_UNUSED,
6058 const_tree type, bool named ATTRIBUTE_UNUSED)
6060 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6063 /* Encode the current state of the #pragma [no_]long_calls. */
6064 typedef enum
6066 OFF, /* No #pragma [no_]long_calls is in effect. */
6067 LONG, /* #pragma long_calls is in effect. */
6068 SHORT /* #pragma no_long_calls is in effect. */
6069 } arm_pragma_enum;
6071 static arm_pragma_enum arm_pragma_long_calls = OFF;
6073 void
6074 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6076 arm_pragma_long_calls = LONG;
6079 void
6080 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6082 arm_pragma_long_calls = SHORT;
6085 void
6086 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6088 arm_pragma_long_calls = OFF;
6091 /* Handle an attribute requiring a FUNCTION_DECL;
6092 arguments as in struct attribute_spec.handler. */
6093 static tree
6094 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6095 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6097 if (TREE_CODE (*node) != FUNCTION_DECL)
6099 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6100 name);
6101 *no_add_attrs = true;
6104 return NULL_TREE;
6107 /* Handle an "interrupt" or "isr" attribute;
6108 arguments as in struct attribute_spec.handler. */
6109 static tree
6110 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6111 bool *no_add_attrs)
6113 if (DECL_P (*node))
6115 if (TREE_CODE (*node) != FUNCTION_DECL)
6117 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6118 name);
6119 *no_add_attrs = true;
6121 /* FIXME: the argument if any is checked for type attributes;
6122 should it be checked for decl ones? */
6124 else
6126 if (TREE_CODE (*node) == FUNCTION_TYPE
6127 || TREE_CODE (*node) == METHOD_TYPE)
6129 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6131 warning (OPT_Wattributes, "%qE attribute ignored",
6132 name);
6133 *no_add_attrs = true;
6136 else if (TREE_CODE (*node) == POINTER_TYPE
6137 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6138 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6139 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6141 *node = build_variant_type_copy (*node);
6142 TREE_TYPE (*node) = build_type_attribute_variant
6143 (TREE_TYPE (*node),
6144 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6145 *no_add_attrs = true;
6147 else
6149 /* Possibly pass this attribute on from the type to a decl. */
6150 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6151 | (int) ATTR_FLAG_FUNCTION_NEXT
6152 | (int) ATTR_FLAG_ARRAY_NEXT))
6154 *no_add_attrs = true;
6155 return tree_cons (name, args, NULL_TREE);
6157 else
6159 warning (OPT_Wattributes, "%qE attribute ignored",
6160 name);
6165 return NULL_TREE;
6168 /* Handle a "pcs" attribute; arguments as in struct
6169 attribute_spec.handler. */
6170 static tree
6171 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6172 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6174 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6176 warning (OPT_Wattributes, "%qE attribute ignored", name);
6177 *no_add_attrs = true;
6179 return NULL_TREE;
6182 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6183 /* Handle the "notshared" attribute. This attribute is another way of
6184 requesting hidden visibility. ARM's compiler supports
6185 "__declspec(notshared)"; we support the same thing via an
6186 attribute. */
6188 static tree
6189 arm_handle_notshared_attribute (tree *node,
6190 tree name ATTRIBUTE_UNUSED,
6191 tree args ATTRIBUTE_UNUSED,
6192 int flags ATTRIBUTE_UNUSED,
6193 bool *no_add_attrs)
6195 tree decl = TYPE_NAME (*node);
6197 if (decl)
6199 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6200 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6201 *no_add_attrs = false;
6203 return NULL_TREE;
6205 #endif
6207 /* Return 0 if the attributes for two types are incompatible, 1 if they
6208 are compatible, and 2 if they are nearly compatible (which causes a
6209 warning to be generated). */
6210 static int
6211 arm_comp_type_attributes (const_tree type1, const_tree type2)
6213 int l1, l2, s1, s2;
6215 /* Check for mismatch of non-default calling convention. */
6216 if (TREE_CODE (type1) != FUNCTION_TYPE)
6217 return 1;
6219 /* Check for mismatched call attributes. */
6220 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6221 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6222 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6223 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6225 /* Only bother to check if an attribute is defined. */
6226 if (l1 | l2 | s1 | s2)
6228 /* If one type has an attribute, the other must have the same attribute. */
6229 if ((l1 != l2) || (s1 != s2))
6230 return 0;
6232 /* Disallow mixed attributes. */
6233 if ((l1 & s2) || (l2 & s1))
6234 return 0;
6237 /* Check for mismatched ISR attribute. */
6238 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6239 if (! l1)
6240 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6241 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6242 if (! l2)
6243 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6244 if (l1 != l2)
6245 return 0;
6247 return 1;
6250 /* Assigns default attributes to newly defined type. This is used to
6251 set short_call/long_call attributes for function types of
6252 functions defined inside corresponding #pragma scopes. */
6253 static void
6254 arm_set_default_type_attributes (tree type)
6256 /* Add __attribute__ ((long_call)) to all functions, when
6257 inside #pragma long_calls or __attribute__ ((short_call)),
6258 when inside #pragma no_long_calls. */
6259 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6261 tree type_attr_list, attr_name;
6262 type_attr_list = TYPE_ATTRIBUTES (type);
6264 if (arm_pragma_long_calls == LONG)
6265 attr_name = get_identifier ("long_call");
6266 else if (arm_pragma_long_calls == SHORT)
6267 attr_name = get_identifier ("short_call");
6268 else
6269 return;
6271 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6272 TYPE_ATTRIBUTES (type) = type_attr_list;
6276 /* Return true if DECL is known to be linked into section SECTION. */
6278 static bool
6279 arm_function_in_section_p (tree decl, section *section)
6281 /* We can only be certain about functions defined in the same
6282 compilation unit. */
6283 if (!TREE_STATIC (decl))
6284 return false;
6286 /* Make sure that SYMBOL always binds to the definition in this
6287 compilation unit. */
6288 if (!targetm.binds_local_p (decl))
6289 return false;
6291 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6292 if (!DECL_SECTION_NAME (decl))
6294 /* Make sure that we will not create a unique section for DECL. */
6295 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6296 return false;
6299 return function_section (decl) == section;
6302 /* Return nonzero if a 32-bit "long_call" should be generated for
6303 a call from the current function to DECL. We generate a long_call
6304 if the function:
6306 a. has an __attribute__((long call))
6307 or b. is within the scope of a #pragma long_calls
6308 or c. the -mlong-calls command line switch has been specified
6310 However we do not generate a long call if the function:
6312 d. has an __attribute__ ((short_call))
6313 or e. is inside the scope of a #pragma no_long_calls
6314 or f. is defined in the same section as the current function. */
6316 bool
6317 arm_is_long_call_p (tree decl)
6319 tree attrs;
6321 if (!decl)
6322 return TARGET_LONG_CALLS;
6324 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6325 if (lookup_attribute ("short_call", attrs))
6326 return false;
6328 /* For "f", be conservative, and only cater for cases in which the
6329 whole of the current function is placed in the same section. */
6330 if (!flag_reorder_blocks_and_partition
6331 && TREE_CODE (decl) == FUNCTION_DECL
6332 && arm_function_in_section_p (decl, current_function_section ()))
6333 return false;
6335 if (lookup_attribute ("long_call", attrs))
6336 return true;
6338 return TARGET_LONG_CALLS;
6341 /* Return nonzero if it is ok to make a tail-call to DECL. */
6342 static bool
6343 arm_function_ok_for_sibcall (tree decl, tree exp)
6345 unsigned long func_type;
6347 if (cfun->machine->sibcall_blocked)
6348 return false;
6350 /* Never tailcall something if we are generating code for Thumb-1. */
6351 if (TARGET_THUMB1)
6352 return false;
6354 /* The PIC register is live on entry to VxWorks PLT entries, so we
6355 must make the call before restoring the PIC register. */
6356 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6357 return false;
6359 /* If we are interworking and the function is not declared static
6360 then we can't tail-call it unless we know that it exists in this
6361 compilation unit (since it might be a Thumb routine). */
6362 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6363 && !TREE_ASM_WRITTEN (decl))
6364 return false;
6366 func_type = arm_current_func_type ();
6367 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6368 if (IS_INTERRUPT (func_type))
6369 return false;
6371 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6373 /* Check that the return value locations are the same. For
6374 example that we aren't returning a value from the sibling in
6375 a VFP register but then need to transfer it to a core
6376 register. */
6377 rtx a, b;
6379 a = arm_function_value (TREE_TYPE (exp), decl, false);
6380 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6381 cfun->decl, false);
6382 if (!rtx_equal_p (a, b))
6383 return false;
6386 /* Never tailcall if function may be called with a misaligned SP. */
6387 if (IS_STACKALIGN (func_type))
6388 return false;
6390 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6391 references should become a NOP. Don't convert such calls into
6392 sibling calls. */
6393 if (TARGET_AAPCS_BASED
6394 && arm_abi == ARM_ABI_AAPCS
6395 && decl
6396 && DECL_WEAK (decl))
6397 return false;
6399 /* Everything else is ok. */
6400 return true;
6404 /* Addressing mode support functions. */
6406 /* Return nonzero if X is a legitimate immediate operand when compiling
6407 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6409 legitimate_pic_operand_p (rtx x)
6411 if (GET_CODE (x) == SYMBOL_REF
6412 || (GET_CODE (x) == CONST
6413 && GET_CODE (XEXP (x, 0)) == PLUS
6414 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6415 return 0;
6417 return 1;
6420 /* Record that the current function needs a PIC register. Initialize
6421 cfun->machine->pic_reg if we have not already done so. */
6423 static void
6424 require_pic_register (void)
6426 /* A lot of the logic here is made obscure by the fact that this
6427 routine gets called as part of the rtx cost estimation process.
6428 We don't want those calls to affect any assumptions about the real
6429 function; and further, we can't call entry_of_function() until we
6430 start the real expansion process. */
6431 if (!crtl->uses_pic_offset_table)
6433 gcc_assert (can_create_pseudo_p ());
6434 if (arm_pic_register != INVALID_REGNUM
6435 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6437 if (!cfun->machine->pic_reg)
6438 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6440 /* Play games to avoid marking the function as needing pic
6441 if we are being called as part of the cost-estimation
6442 process. */
6443 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6444 crtl->uses_pic_offset_table = 1;
6446 else
6448 rtx_insn *seq, *insn;
6450 if (!cfun->machine->pic_reg)
6451 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6453 /* Play games to avoid marking the function as needing pic
6454 if we are being called as part of the cost-estimation
6455 process. */
6456 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6458 crtl->uses_pic_offset_table = 1;
6459 start_sequence ();
6461 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6462 && arm_pic_register > LAST_LO_REGNUM)
6463 emit_move_insn (cfun->machine->pic_reg,
6464 gen_rtx_REG (Pmode, arm_pic_register));
6465 else
6466 arm_load_pic_register (0UL);
6468 seq = get_insns ();
6469 end_sequence ();
6471 for (insn = seq; insn; insn = NEXT_INSN (insn))
6472 if (INSN_P (insn))
6473 INSN_LOCATION (insn) = prologue_location;
6475 /* We can be called during expansion of PHI nodes, where
6476 we can't yet emit instructions directly in the final
6477 insn stream. Queue the insns on the entry edge, they will
6478 be committed after everything else is expanded. */
6479 insert_insn_on_edge (seq,
6480 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6487 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6489 if (GET_CODE (orig) == SYMBOL_REF
6490 || GET_CODE (orig) == LABEL_REF)
6492 rtx insn;
6494 if (reg == 0)
6496 gcc_assert (can_create_pseudo_p ());
6497 reg = gen_reg_rtx (Pmode);
6500 /* VxWorks does not impose a fixed gap between segments; the run-time
6501 gap can be different from the object-file gap. We therefore can't
6502 use GOTOFF unless we are absolutely sure that the symbol is in the
6503 same segment as the GOT. Unfortunately, the flexibility of linker
6504 scripts means that we can't be sure of that in general, so assume
6505 that GOTOFF is never valid on VxWorks. */
6506 if ((GET_CODE (orig) == LABEL_REF
6507 || (GET_CODE (orig) == SYMBOL_REF &&
6508 SYMBOL_REF_LOCAL_P (orig)))
6509 && NEED_GOT_RELOC
6510 && arm_pic_data_is_text_relative)
6511 insn = arm_pic_static_addr (orig, reg);
6512 else
6514 rtx pat;
6515 rtx mem;
6517 /* If this function doesn't have a pic register, create one now. */
6518 require_pic_register ();
6520 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6522 /* Make the MEM as close to a constant as possible. */
6523 mem = SET_SRC (pat);
6524 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6525 MEM_READONLY_P (mem) = 1;
6526 MEM_NOTRAP_P (mem) = 1;
6528 insn = emit_insn (pat);
6531 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6532 by loop. */
6533 set_unique_reg_note (insn, REG_EQUAL, orig);
6535 return reg;
6537 else if (GET_CODE (orig) == CONST)
6539 rtx base, offset;
6541 if (GET_CODE (XEXP (orig, 0)) == PLUS
6542 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6543 return orig;
6545 /* Handle the case where we have: const (UNSPEC_TLS). */
6546 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6547 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6548 return orig;
6550 /* Handle the case where we have:
6551 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6552 CONST_INT. */
6553 if (GET_CODE (XEXP (orig, 0)) == PLUS
6554 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6555 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6557 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6558 return orig;
6561 if (reg == 0)
6563 gcc_assert (can_create_pseudo_p ());
6564 reg = gen_reg_rtx (Pmode);
6567 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6569 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6570 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6571 base == reg ? 0 : reg);
6573 if (CONST_INT_P (offset))
6575 /* The base register doesn't really matter, we only want to
6576 test the index for the appropriate mode. */
6577 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6579 gcc_assert (can_create_pseudo_p ());
6580 offset = force_reg (Pmode, offset);
6583 if (CONST_INT_P (offset))
6584 return plus_constant (Pmode, base, INTVAL (offset));
6587 if (GET_MODE_SIZE (mode) > 4
6588 && (GET_MODE_CLASS (mode) == MODE_INT
6589 || TARGET_SOFT_FLOAT))
6591 emit_insn (gen_addsi3 (reg, base, offset));
6592 return reg;
6595 return gen_rtx_PLUS (Pmode, base, offset);
6598 return orig;
6602 /* Find a spare register to use during the prolog of a function. */
6604 static int
6605 thumb_find_work_register (unsigned long pushed_regs_mask)
6607 int reg;
6609 /* Check the argument registers first as these are call-used. The
6610 register allocation order means that sometimes r3 might be used
6611 but earlier argument registers might not, so check them all. */
6612 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6613 if (!df_regs_ever_live_p (reg))
6614 return reg;
6616 /* Before going on to check the call-saved registers we can try a couple
6617 more ways of deducing that r3 is available. The first is when we are
6618 pushing anonymous arguments onto the stack and we have less than 4
6619 registers worth of fixed arguments(*). In this case r3 will be part of
6620 the variable argument list and so we can be sure that it will be
6621 pushed right at the start of the function. Hence it will be available
6622 for the rest of the prologue.
6623 (*): ie crtl->args.pretend_args_size is greater than 0. */
6624 if (cfun->machine->uses_anonymous_args
6625 && crtl->args.pretend_args_size > 0)
6626 return LAST_ARG_REGNUM;
6628 /* The other case is when we have fixed arguments but less than 4 registers
6629 worth. In this case r3 might be used in the body of the function, but
6630 it is not being used to convey an argument into the function. In theory
6631 we could just check crtl->args.size to see how many bytes are
6632 being passed in argument registers, but it seems that it is unreliable.
6633 Sometimes it will have the value 0 when in fact arguments are being
6634 passed. (See testcase execute/20021111-1.c for an example). So we also
6635 check the args_info.nregs field as well. The problem with this field is
6636 that it makes no allowances for arguments that are passed to the
6637 function but which are not used. Hence we could miss an opportunity
6638 when a function has an unused argument in r3. But it is better to be
6639 safe than to be sorry. */
6640 if (! cfun->machine->uses_anonymous_args
6641 && crtl->args.size >= 0
6642 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6643 && (TARGET_AAPCS_BASED
6644 ? crtl->args.info.aapcs_ncrn < 4
6645 : crtl->args.info.nregs < 4))
6646 return LAST_ARG_REGNUM;
6648 /* Otherwise look for a call-saved register that is going to be pushed. */
6649 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6650 if (pushed_regs_mask & (1 << reg))
6651 return reg;
6653 if (TARGET_THUMB2)
6655 /* Thumb-2 can use high regs. */
6656 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6657 if (pushed_regs_mask & (1 << reg))
6658 return reg;
6660 /* Something went wrong - thumb_compute_save_reg_mask()
6661 should have arranged for a suitable register to be pushed. */
6662 gcc_unreachable ();
6665 static GTY(()) int pic_labelno;
6667 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6668 low register. */
6670 void
6671 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6673 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6675 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6676 return;
6678 gcc_assert (flag_pic);
6680 pic_reg = cfun->machine->pic_reg;
6681 if (TARGET_VXWORKS_RTP)
6683 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6684 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6685 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6687 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6689 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6690 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6692 else
6694 /* We use an UNSPEC rather than a LABEL_REF because this label
6695 never appears in the code stream. */
6697 labelno = GEN_INT (pic_labelno++);
6698 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6699 l1 = gen_rtx_CONST (VOIDmode, l1);
6701 /* On the ARM the PC register contains 'dot + 8' at the time of the
6702 addition, on the Thumb it is 'dot + 4'. */
6703 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6704 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6705 UNSPEC_GOTSYM_OFF);
6706 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6708 if (TARGET_32BIT)
6710 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6712 else /* TARGET_THUMB1 */
6714 if (arm_pic_register != INVALID_REGNUM
6715 && REGNO (pic_reg) > LAST_LO_REGNUM)
6717 /* We will have pushed the pic register, so we should always be
6718 able to find a work register. */
6719 pic_tmp = gen_rtx_REG (SImode,
6720 thumb_find_work_register (saved_regs));
6721 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6722 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6723 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6725 else if (arm_pic_register != INVALID_REGNUM
6726 && arm_pic_register > LAST_LO_REGNUM
6727 && REGNO (pic_reg) <= LAST_LO_REGNUM)
6729 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6730 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6731 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6733 else
6734 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6738 /* Need to emit this whether or not we obey regdecls,
6739 since setjmp/longjmp can cause life info to screw up. */
6740 emit_use (pic_reg);
6743 /* Generate code to load the address of a static var when flag_pic is set. */
6744 static rtx
6745 arm_pic_static_addr (rtx orig, rtx reg)
6747 rtx l1, labelno, offset_rtx, insn;
6749 gcc_assert (flag_pic);
6751 /* We use an UNSPEC rather than a LABEL_REF because this label
6752 never appears in the code stream. */
6753 labelno = GEN_INT (pic_labelno++);
6754 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6755 l1 = gen_rtx_CONST (VOIDmode, l1);
6757 /* On the ARM the PC register contains 'dot + 8' at the time of the
6758 addition, on the Thumb it is 'dot + 4'. */
6759 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6760 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6761 UNSPEC_SYMBOL_OFFSET);
6762 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6764 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6765 return insn;
6768 /* Return nonzero if X is valid as an ARM state addressing register. */
6769 static int
6770 arm_address_register_rtx_p (rtx x, int strict_p)
6772 int regno;
6774 if (!REG_P (x))
6775 return 0;
6777 regno = REGNO (x);
6779 if (strict_p)
6780 return ARM_REGNO_OK_FOR_BASE_P (regno);
6782 return (regno <= LAST_ARM_REGNUM
6783 || regno >= FIRST_PSEUDO_REGISTER
6784 || regno == FRAME_POINTER_REGNUM
6785 || regno == ARG_POINTER_REGNUM);
6788 /* Return TRUE if this rtx is the difference of a symbol and a label,
6789 and will reduce to a PC-relative relocation in the object file.
6790 Expressions like this can be left alone when generating PIC, rather
6791 than forced through the GOT. */
6792 static int
6793 pcrel_constant_p (rtx x)
6795 if (GET_CODE (x) == MINUS)
6796 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6798 return FALSE;
6801 /* Return true if X will surely end up in an index register after next
6802 splitting pass. */
6803 static bool
6804 will_be_in_index_register (const_rtx x)
6806 /* arm.md: calculate_pic_address will split this into a register. */
6807 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6810 /* Return nonzero if X is a valid ARM state address operand. */
6812 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
6813 int strict_p)
6815 bool use_ldrd;
6816 enum rtx_code code = GET_CODE (x);
6818 if (arm_address_register_rtx_p (x, strict_p))
6819 return 1;
6821 use_ldrd = (TARGET_LDRD
6822 && (mode == DImode
6823 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6825 if (code == POST_INC || code == PRE_DEC
6826 || ((code == PRE_INC || code == POST_DEC)
6827 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6828 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6830 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6831 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6832 && GET_CODE (XEXP (x, 1)) == PLUS
6833 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6835 rtx addend = XEXP (XEXP (x, 1), 1);
6837 /* Don't allow ldrd post increment by register because it's hard
6838 to fixup invalid register choices. */
6839 if (use_ldrd
6840 && GET_CODE (x) == POST_MODIFY
6841 && REG_P (addend))
6842 return 0;
6844 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6845 && arm_legitimate_index_p (mode, addend, outer, strict_p));
6848 /* After reload constants split into minipools will have addresses
6849 from a LABEL_REF. */
6850 else if (reload_completed
6851 && (code == LABEL_REF
6852 || (code == CONST
6853 && GET_CODE (XEXP (x, 0)) == PLUS
6854 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6855 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6856 return 1;
6858 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6859 return 0;
6861 else if (code == PLUS)
6863 rtx xop0 = XEXP (x, 0);
6864 rtx xop1 = XEXP (x, 1);
6866 return ((arm_address_register_rtx_p (xop0, strict_p)
6867 && ((CONST_INT_P (xop1)
6868 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6869 || (!strict_p && will_be_in_index_register (xop1))))
6870 || (arm_address_register_rtx_p (xop1, strict_p)
6871 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6874 #if 0
6875 /* Reload currently can't handle MINUS, so disable this for now */
6876 else if (GET_CODE (x) == MINUS)
6878 rtx xop0 = XEXP (x, 0);
6879 rtx xop1 = XEXP (x, 1);
6881 return (arm_address_register_rtx_p (xop0, strict_p)
6882 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6884 #endif
6886 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6887 && code == SYMBOL_REF
6888 && CONSTANT_POOL_ADDRESS_P (x)
6889 && ! (flag_pic
6890 && symbol_mentioned_p (get_pool_constant (x))
6891 && ! pcrel_constant_p (get_pool_constant (x))))
6892 return 1;
6894 return 0;
6897 /* Return nonzero if X is a valid Thumb-2 address operand. */
6898 static int
6899 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
6901 bool use_ldrd;
6902 enum rtx_code code = GET_CODE (x);
6904 if (arm_address_register_rtx_p (x, strict_p))
6905 return 1;
6907 use_ldrd = (TARGET_LDRD
6908 && (mode == DImode
6909 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6911 if (code == POST_INC || code == PRE_DEC
6912 || ((code == PRE_INC || code == POST_DEC)
6913 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6914 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6916 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6917 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6918 && GET_CODE (XEXP (x, 1)) == PLUS
6919 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6921 /* Thumb-2 only has autoincrement by constant. */
6922 rtx addend = XEXP (XEXP (x, 1), 1);
6923 HOST_WIDE_INT offset;
6925 if (!CONST_INT_P (addend))
6926 return 0;
6928 offset = INTVAL(addend);
6929 if (GET_MODE_SIZE (mode) <= 4)
6930 return (offset > -256 && offset < 256);
6932 return (use_ldrd && offset > -1024 && offset < 1024
6933 && (offset & 3) == 0);
6936 /* After reload constants split into minipools will have addresses
6937 from a LABEL_REF. */
6938 else if (reload_completed
6939 && (code == LABEL_REF
6940 || (code == CONST
6941 && GET_CODE (XEXP (x, 0)) == PLUS
6942 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6943 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6944 return 1;
6946 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6947 return 0;
6949 else if (code == PLUS)
6951 rtx xop0 = XEXP (x, 0);
6952 rtx xop1 = XEXP (x, 1);
6954 return ((arm_address_register_rtx_p (xop0, strict_p)
6955 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
6956 || (!strict_p && will_be_in_index_register (xop1))))
6957 || (arm_address_register_rtx_p (xop1, strict_p)
6958 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
6961 /* Normally we can assign constant values to target registers without
6962 the help of constant pool. But there are cases we have to use constant
6963 pool like:
6964 1) assign a label to register.
6965 2) sign-extend a 8bit value to 32bit and then assign to register.
6967 Constant pool access in format:
6968 (set (reg r0) (mem (symbol_ref (".LC0"))))
6969 will cause the use of literal pool (later in function arm_reorg).
6970 So here we mark such format as an invalid format, then the compiler
6971 will adjust it into:
6972 (set (reg r0) (symbol_ref (".LC0")))
6973 (set (reg r0) (mem (reg r0))).
6974 No extra register is required, and (mem (reg r0)) won't cause the use
6975 of literal pools. */
6976 else if (arm_disable_literal_pool && code == SYMBOL_REF
6977 && CONSTANT_POOL_ADDRESS_P (x))
6978 return 0;
6980 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6981 && code == SYMBOL_REF
6982 && CONSTANT_POOL_ADDRESS_P (x)
6983 && ! (flag_pic
6984 && symbol_mentioned_p (get_pool_constant (x))
6985 && ! pcrel_constant_p (get_pool_constant (x))))
6986 return 1;
6988 return 0;
6991 /* Return nonzero if INDEX is valid for an address index operand in
6992 ARM state. */
6993 static int
6994 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
6995 int strict_p)
6997 HOST_WIDE_INT range;
6998 enum rtx_code code = GET_CODE (index);
7000 /* Standard coprocessor addressing modes. */
7001 if (TARGET_HARD_FLOAT
7002 && TARGET_VFP
7003 && (mode == SFmode || mode == DFmode))
7004 return (code == CONST_INT && INTVAL (index) < 1024
7005 && INTVAL (index) > -1024
7006 && (INTVAL (index) & 3) == 0);
7008 /* For quad modes, we restrict the constant offset to be slightly less
7009 than what the instruction format permits. We do this because for
7010 quad mode moves, we will actually decompose them into two separate
7011 double-mode reads or writes. INDEX must therefore be a valid
7012 (double-mode) offset and so should INDEX+8. */
7013 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7014 return (code == CONST_INT
7015 && INTVAL (index) < 1016
7016 && INTVAL (index) > -1024
7017 && (INTVAL (index) & 3) == 0);
7019 /* We have no such constraint on double mode offsets, so we permit the
7020 full range of the instruction format. */
7021 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7022 return (code == CONST_INT
7023 && INTVAL (index) < 1024
7024 && INTVAL (index) > -1024
7025 && (INTVAL (index) & 3) == 0);
7027 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7028 return (code == CONST_INT
7029 && INTVAL (index) < 1024
7030 && INTVAL (index) > -1024
7031 && (INTVAL (index) & 3) == 0);
7033 if (arm_address_register_rtx_p (index, strict_p)
7034 && (GET_MODE_SIZE (mode) <= 4))
7035 return 1;
7037 if (mode == DImode || mode == DFmode)
7039 if (code == CONST_INT)
7041 HOST_WIDE_INT val = INTVAL (index);
7043 if (TARGET_LDRD)
7044 return val > -256 && val < 256;
7045 else
7046 return val > -4096 && val < 4092;
7049 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7052 if (GET_MODE_SIZE (mode) <= 4
7053 && ! (arm_arch4
7054 && (mode == HImode
7055 || mode == HFmode
7056 || (mode == QImode && outer == SIGN_EXTEND))))
7058 if (code == MULT)
7060 rtx xiop0 = XEXP (index, 0);
7061 rtx xiop1 = XEXP (index, 1);
7063 return ((arm_address_register_rtx_p (xiop0, strict_p)
7064 && power_of_two_operand (xiop1, SImode))
7065 || (arm_address_register_rtx_p (xiop1, strict_p)
7066 && power_of_two_operand (xiop0, SImode)));
7068 else if (code == LSHIFTRT || code == ASHIFTRT
7069 || code == ASHIFT || code == ROTATERT)
7071 rtx op = XEXP (index, 1);
7073 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7074 && CONST_INT_P (op)
7075 && INTVAL (op) > 0
7076 && INTVAL (op) <= 31);
7080 /* For ARM v4 we may be doing a sign-extend operation during the
7081 load. */
7082 if (arm_arch4)
7084 if (mode == HImode
7085 || mode == HFmode
7086 || (outer == SIGN_EXTEND && mode == QImode))
7087 range = 256;
7088 else
7089 range = 4096;
7091 else
7092 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7094 return (code == CONST_INT
7095 && INTVAL (index) < range
7096 && INTVAL (index) > -range);
7099 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7100 index operand. i.e. 1, 2, 4 or 8. */
7101 static bool
7102 thumb2_index_mul_operand (rtx op)
7104 HOST_WIDE_INT val;
7106 if (!CONST_INT_P (op))
7107 return false;
7109 val = INTVAL(op);
7110 return (val == 1 || val == 2 || val == 4 || val == 8);
7113 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7114 static int
7115 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7117 enum rtx_code code = GET_CODE (index);
7119 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7120 /* Standard coprocessor addressing modes. */
7121 if (TARGET_HARD_FLOAT
7122 && TARGET_VFP
7123 && (mode == SFmode || mode == DFmode))
7124 return (code == CONST_INT && INTVAL (index) < 1024
7125 /* Thumb-2 allows only > -256 index range for it's core register
7126 load/stores. Since we allow SF/DF in core registers, we have
7127 to use the intersection between -256~4096 (core) and -1024~1024
7128 (coprocessor). */
7129 && INTVAL (index) > -256
7130 && (INTVAL (index) & 3) == 0);
7132 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7134 /* For DImode assume values will usually live in core regs
7135 and only allow LDRD addressing modes. */
7136 if (!TARGET_LDRD || mode != DImode)
7137 return (code == CONST_INT
7138 && INTVAL (index) < 1024
7139 && INTVAL (index) > -1024
7140 && (INTVAL (index) & 3) == 0);
7143 /* For quad modes, we restrict the constant offset to be slightly less
7144 than what the instruction format permits. We do this because for
7145 quad mode moves, we will actually decompose them into two separate
7146 double-mode reads or writes. INDEX must therefore be a valid
7147 (double-mode) offset and so should INDEX+8. */
7148 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7149 return (code == CONST_INT
7150 && INTVAL (index) < 1016
7151 && INTVAL (index) > -1024
7152 && (INTVAL (index) & 3) == 0);
7154 /* We have no such constraint on double mode offsets, so we permit the
7155 full range of the instruction format. */
7156 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7157 return (code == CONST_INT
7158 && INTVAL (index) < 1024
7159 && INTVAL (index) > -1024
7160 && (INTVAL (index) & 3) == 0);
7162 if (arm_address_register_rtx_p (index, strict_p)
7163 && (GET_MODE_SIZE (mode) <= 4))
7164 return 1;
7166 if (mode == DImode || mode == DFmode)
7168 if (code == CONST_INT)
7170 HOST_WIDE_INT val = INTVAL (index);
7171 /* ??? Can we assume ldrd for thumb2? */
7172 /* Thumb-2 ldrd only has reg+const addressing modes. */
7173 /* ldrd supports offsets of +-1020.
7174 However the ldr fallback does not. */
7175 return val > -256 && val < 256 && (val & 3) == 0;
7177 else
7178 return 0;
7181 if (code == MULT)
7183 rtx xiop0 = XEXP (index, 0);
7184 rtx xiop1 = XEXP (index, 1);
7186 return ((arm_address_register_rtx_p (xiop0, strict_p)
7187 && thumb2_index_mul_operand (xiop1))
7188 || (arm_address_register_rtx_p (xiop1, strict_p)
7189 && thumb2_index_mul_operand (xiop0)));
7191 else if (code == ASHIFT)
7193 rtx op = XEXP (index, 1);
7195 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7196 && CONST_INT_P (op)
7197 && INTVAL (op) > 0
7198 && INTVAL (op) <= 3);
7201 return (code == CONST_INT
7202 && INTVAL (index) < 4096
7203 && INTVAL (index) > -256);
7206 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7207 static int
7208 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7210 int regno;
7212 if (!REG_P (x))
7213 return 0;
7215 regno = REGNO (x);
7217 if (strict_p)
7218 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7220 return (regno <= LAST_LO_REGNUM
7221 || regno > LAST_VIRTUAL_REGISTER
7222 || regno == FRAME_POINTER_REGNUM
7223 || (GET_MODE_SIZE (mode) >= 4
7224 && (regno == STACK_POINTER_REGNUM
7225 || regno >= FIRST_PSEUDO_REGISTER
7226 || x == hard_frame_pointer_rtx
7227 || x == arg_pointer_rtx)));
7230 /* Return nonzero if x is a legitimate index register. This is the case
7231 for any base register that can access a QImode object. */
7232 inline static int
7233 thumb1_index_register_rtx_p (rtx x, int strict_p)
7235 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7238 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7240 The AP may be eliminated to either the SP or the FP, so we use the
7241 least common denominator, e.g. SImode, and offsets from 0 to 64.
7243 ??? Verify whether the above is the right approach.
7245 ??? Also, the FP may be eliminated to the SP, so perhaps that
7246 needs special handling also.
7248 ??? Look at how the mips16 port solves this problem. It probably uses
7249 better ways to solve some of these problems.
7251 Although it is not incorrect, we don't accept QImode and HImode
7252 addresses based on the frame pointer or arg pointer until the
7253 reload pass starts. This is so that eliminating such addresses
7254 into stack based ones won't produce impossible code. */
7256 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7258 /* ??? Not clear if this is right. Experiment. */
7259 if (GET_MODE_SIZE (mode) < 4
7260 && !(reload_in_progress || reload_completed)
7261 && (reg_mentioned_p (frame_pointer_rtx, x)
7262 || reg_mentioned_p (arg_pointer_rtx, x)
7263 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7264 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7265 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7266 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7267 return 0;
7269 /* Accept any base register. SP only in SImode or larger. */
7270 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7271 return 1;
7273 /* This is PC relative data before arm_reorg runs. */
7274 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7275 && GET_CODE (x) == SYMBOL_REF
7276 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7277 return 1;
7279 /* This is PC relative data after arm_reorg runs. */
7280 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7281 && reload_completed
7282 && (GET_CODE (x) == LABEL_REF
7283 || (GET_CODE (x) == CONST
7284 && GET_CODE (XEXP (x, 0)) == PLUS
7285 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7286 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7287 return 1;
7289 /* Post-inc indexing only supported for SImode and larger. */
7290 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7291 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7292 return 1;
7294 else if (GET_CODE (x) == PLUS)
7296 /* REG+REG address can be any two index registers. */
7297 /* We disallow FRAME+REG addressing since we know that FRAME
7298 will be replaced with STACK, and SP relative addressing only
7299 permits SP+OFFSET. */
7300 if (GET_MODE_SIZE (mode) <= 4
7301 && XEXP (x, 0) != frame_pointer_rtx
7302 && XEXP (x, 1) != frame_pointer_rtx
7303 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7304 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7305 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7306 return 1;
7308 /* REG+const has 5-7 bit offset for non-SP registers. */
7309 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7310 || XEXP (x, 0) == arg_pointer_rtx)
7311 && CONST_INT_P (XEXP (x, 1))
7312 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7313 return 1;
7315 /* REG+const has 10-bit offset for SP, but only SImode and
7316 larger is supported. */
7317 /* ??? Should probably check for DI/DFmode overflow here
7318 just like GO_IF_LEGITIMATE_OFFSET does. */
7319 else if (REG_P (XEXP (x, 0))
7320 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7321 && GET_MODE_SIZE (mode) >= 4
7322 && CONST_INT_P (XEXP (x, 1))
7323 && INTVAL (XEXP (x, 1)) >= 0
7324 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7325 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7326 return 1;
7328 else if (REG_P (XEXP (x, 0))
7329 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7330 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7331 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7332 && REGNO (XEXP (x, 0))
7333 <= LAST_VIRTUAL_POINTER_REGISTER))
7334 && GET_MODE_SIZE (mode) >= 4
7335 && CONST_INT_P (XEXP (x, 1))
7336 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7337 return 1;
7340 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7341 && GET_MODE_SIZE (mode) == 4
7342 && GET_CODE (x) == SYMBOL_REF
7343 && CONSTANT_POOL_ADDRESS_P (x)
7344 && ! (flag_pic
7345 && symbol_mentioned_p (get_pool_constant (x))
7346 && ! pcrel_constant_p (get_pool_constant (x))))
7347 return 1;
7349 return 0;
7352 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7353 instruction of mode MODE. */
7355 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7357 switch (GET_MODE_SIZE (mode))
7359 case 1:
7360 return val >= 0 && val < 32;
7362 case 2:
7363 return val >= 0 && val < 64 && (val & 1) == 0;
7365 default:
7366 return (val >= 0
7367 && (val + GET_MODE_SIZE (mode)) <= 128
7368 && (val & 3) == 0);
7372 bool
7373 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7375 if (TARGET_ARM)
7376 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7377 else if (TARGET_THUMB2)
7378 return thumb2_legitimate_address_p (mode, x, strict_p);
7379 else /* if (TARGET_THUMB1) */
7380 return thumb1_legitimate_address_p (mode, x, strict_p);
7383 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7385 Given an rtx X being reloaded into a reg required to be
7386 in class CLASS, return the class of reg to actually use.
7387 In general this is just CLASS, but for the Thumb core registers and
7388 immediate constants we prefer a LO_REGS class or a subset. */
7390 static reg_class_t
7391 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7393 if (TARGET_32BIT)
7394 return rclass;
7395 else
7397 if (rclass == GENERAL_REGS)
7398 return LO_REGS;
7399 else
7400 return rclass;
7404 /* Build the SYMBOL_REF for __tls_get_addr. */
7406 static GTY(()) rtx tls_get_addr_libfunc;
7408 static rtx
7409 get_tls_get_addr (void)
7411 if (!tls_get_addr_libfunc)
7412 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7413 return tls_get_addr_libfunc;
7417 arm_load_tp (rtx target)
7419 if (!target)
7420 target = gen_reg_rtx (SImode);
7422 if (TARGET_HARD_TP)
7424 /* Can return in any reg. */
7425 emit_insn (gen_load_tp_hard (target));
7427 else
7429 /* Always returned in r0. Immediately copy the result into a pseudo,
7430 otherwise other uses of r0 (e.g. setting up function arguments) may
7431 clobber the value. */
7433 rtx tmp;
7435 emit_insn (gen_load_tp_soft ());
7437 tmp = gen_rtx_REG (SImode, 0);
7438 emit_move_insn (target, tmp);
7440 return target;
7443 static rtx
7444 load_tls_operand (rtx x, rtx reg)
7446 rtx tmp;
7448 if (reg == NULL_RTX)
7449 reg = gen_reg_rtx (SImode);
7451 tmp = gen_rtx_CONST (SImode, x);
7453 emit_move_insn (reg, tmp);
7455 return reg;
7458 static rtx
7459 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7461 rtx insns, label, labelno, sum;
7463 gcc_assert (reloc != TLS_DESCSEQ);
7464 start_sequence ();
7466 labelno = GEN_INT (pic_labelno++);
7467 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7468 label = gen_rtx_CONST (VOIDmode, label);
7470 sum = gen_rtx_UNSPEC (Pmode,
7471 gen_rtvec (4, x, GEN_INT (reloc), label,
7472 GEN_INT (TARGET_ARM ? 8 : 4)),
7473 UNSPEC_TLS);
7474 reg = load_tls_operand (sum, reg);
7476 if (TARGET_ARM)
7477 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7478 else
7479 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7481 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7482 LCT_PURE, /* LCT_CONST? */
7483 Pmode, 1, reg, Pmode);
7485 insns = get_insns ();
7486 end_sequence ();
7488 return insns;
7491 static rtx
7492 arm_tls_descseq_addr (rtx x, rtx reg)
7494 rtx labelno = GEN_INT (pic_labelno++);
7495 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7496 rtx sum = gen_rtx_UNSPEC (Pmode,
7497 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7498 gen_rtx_CONST (VOIDmode, label),
7499 GEN_INT (!TARGET_ARM)),
7500 UNSPEC_TLS);
7501 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
7503 emit_insn (gen_tlscall (x, labelno));
7504 if (!reg)
7505 reg = gen_reg_rtx (SImode);
7506 else
7507 gcc_assert (REGNO (reg) != 0);
7509 emit_move_insn (reg, reg0);
7511 return reg;
7515 legitimize_tls_address (rtx x, rtx reg)
7517 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7518 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7520 switch (model)
7522 case TLS_MODEL_GLOBAL_DYNAMIC:
7523 if (TARGET_GNU2_TLS)
7525 reg = arm_tls_descseq_addr (x, reg);
7527 tp = arm_load_tp (NULL_RTX);
7529 dest = gen_rtx_PLUS (Pmode, tp, reg);
7531 else
7533 /* Original scheme */
7534 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7535 dest = gen_reg_rtx (Pmode);
7536 emit_libcall_block (insns, dest, ret, x);
7538 return dest;
7540 case TLS_MODEL_LOCAL_DYNAMIC:
7541 if (TARGET_GNU2_TLS)
7543 reg = arm_tls_descseq_addr (x, reg);
7545 tp = arm_load_tp (NULL_RTX);
7547 dest = gen_rtx_PLUS (Pmode, tp, reg);
7549 else
7551 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7553 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7554 share the LDM result with other LD model accesses. */
7555 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7556 UNSPEC_TLS);
7557 dest = gen_reg_rtx (Pmode);
7558 emit_libcall_block (insns, dest, ret, eqv);
7560 /* Load the addend. */
7561 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7562 GEN_INT (TLS_LDO32)),
7563 UNSPEC_TLS);
7564 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7565 dest = gen_rtx_PLUS (Pmode, dest, addend);
7567 return dest;
7569 case TLS_MODEL_INITIAL_EXEC:
7570 labelno = GEN_INT (pic_labelno++);
7571 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7572 label = gen_rtx_CONST (VOIDmode, label);
7573 sum = gen_rtx_UNSPEC (Pmode,
7574 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7575 GEN_INT (TARGET_ARM ? 8 : 4)),
7576 UNSPEC_TLS);
7577 reg = load_tls_operand (sum, reg);
7579 if (TARGET_ARM)
7580 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7581 else if (TARGET_THUMB2)
7582 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7583 else
7585 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7586 emit_move_insn (reg, gen_const_mem (SImode, reg));
7589 tp = arm_load_tp (NULL_RTX);
7591 return gen_rtx_PLUS (Pmode, tp, reg);
7593 case TLS_MODEL_LOCAL_EXEC:
7594 tp = arm_load_tp (NULL_RTX);
7596 reg = gen_rtx_UNSPEC (Pmode,
7597 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7598 UNSPEC_TLS);
7599 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7601 return gen_rtx_PLUS (Pmode, tp, reg);
7603 default:
7604 abort ();
7608 /* Try machine-dependent ways of modifying an illegitimate address
7609 to be legitimate. If we find one, return the new, valid address. */
7611 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7613 if (arm_tls_referenced_p (x))
7615 rtx addend = NULL;
7617 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7619 addend = XEXP (XEXP (x, 0), 1);
7620 x = XEXP (XEXP (x, 0), 0);
7623 if (GET_CODE (x) != SYMBOL_REF)
7624 return x;
7626 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7628 x = legitimize_tls_address (x, NULL_RTX);
7630 if (addend)
7632 x = gen_rtx_PLUS (SImode, x, addend);
7633 orig_x = x;
7635 else
7636 return x;
7639 if (!TARGET_ARM)
7641 /* TODO: legitimize_address for Thumb2. */
7642 if (TARGET_THUMB2)
7643 return x;
7644 return thumb_legitimize_address (x, orig_x, mode);
7647 if (GET_CODE (x) == PLUS)
7649 rtx xop0 = XEXP (x, 0);
7650 rtx xop1 = XEXP (x, 1);
7652 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7653 xop0 = force_reg (SImode, xop0);
7655 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7656 && !symbol_mentioned_p (xop1))
7657 xop1 = force_reg (SImode, xop1);
7659 if (ARM_BASE_REGISTER_RTX_P (xop0)
7660 && CONST_INT_P (xop1))
7662 HOST_WIDE_INT n, low_n;
7663 rtx base_reg, val;
7664 n = INTVAL (xop1);
7666 /* VFP addressing modes actually allow greater offsets, but for
7667 now we just stick with the lowest common denominator. */
7668 if (mode == DImode
7669 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7671 low_n = n & 0x0f;
7672 n &= ~0x0f;
7673 if (low_n > 4)
7675 n += 16;
7676 low_n -= 16;
7679 else
7681 low_n = ((mode) == TImode ? 0
7682 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7683 n -= low_n;
7686 base_reg = gen_reg_rtx (SImode);
7687 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7688 emit_move_insn (base_reg, val);
7689 x = plus_constant (Pmode, base_reg, low_n);
7691 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7692 x = gen_rtx_PLUS (SImode, xop0, xop1);
7695 /* XXX We don't allow MINUS any more -- see comment in
7696 arm_legitimate_address_outer_p (). */
7697 else if (GET_CODE (x) == MINUS)
7699 rtx xop0 = XEXP (x, 0);
7700 rtx xop1 = XEXP (x, 1);
7702 if (CONSTANT_P (xop0))
7703 xop0 = force_reg (SImode, xop0);
7705 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7706 xop1 = force_reg (SImode, xop1);
7708 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7709 x = gen_rtx_MINUS (SImode, xop0, xop1);
7712 /* Make sure to take full advantage of the pre-indexed addressing mode
7713 with absolute addresses which often allows for the base register to
7714 be factorized for multiple adjacent memory references, and it might
7715 even allows for the mini pool to be avoided entirely. */
7716 else if (CONST_INT_P (x) && optimize > 0)
7718 unsigned int bits;
7719 HOST_WIDE_INT mask, base, index;
7720 rtx base_reg;
7722 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7723 use a 8-bit index. So let's use a 12-bit index for SImode only and
7724 hope that arm_gen_constant will enable ldrb to use more bits. */
7725 bits = (mode == SImode) ? 12 : 8;
7726 mask = (1 << bits) - 1;
7727 base = INTVAL (x) & ~mask;
7728 index = INTVAL (x) & mask;
7729 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7731 /* It'll most probably be more efficient to generate the base
7732 with more bits set and use a negative index instead. */
7733 base |= mask;
7734 index -= mask;
7736 base_reg = force_reg (SImode, GEN_INT (base));
7737 x = plus_constant (Pmode, base_reg, index);
7740 if (flag_pic)
7742 /* We need to find and carefully transform any SYMBOL and LABEL
7743 references; so go back to the original address expression. */
7744 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7746 if (new_x != orig_x)
7747 x = new_x;
7750 return x;
7754 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7755 to be legitimate. If we find one, return the new, valid address. */
7757 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7759 if (GET_CODE (x) == PLUS
7760 && CONST_INT_P (XEXP (x, 1))
7761 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7762 || INTVAL (XEXP (x, 1)) < 0))
7764 rtx xop0 = XEXP (x, 0);
7765 rtx xop1 = XEXP (x, 1);
7766 HOST_WIDE_INT offset = INTVAL (xop1);
7768 /* Try and fold the offset into a biasing of the base register and
7769 then offsetting that. Don't do this when optimizing for space
7770 since it can cause too many CSEs. */
7771 if (optimize_size && offset >= 0
7772 && offset < 256 + 31 * GET_MODE_SIZE (mode))
7774 HOST_WIDE_INT delta;
7776 if (offset >= 256)
7777 delta = offset - (256 - GET_MODE_SIZE (mode));
7778 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7779 delta = 31 * GET_MODE_SIZE (mode);
7780 else
7781 delta = offset & (~31 * GET_MODE_SIZE (mode));
7783 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7784 NULL_RTX);
7785 x = plus_constant (Pmode, xop0, delta);
7787 else if (offset < 0 && offset > -256)
7788 /* Small negative offsets are best done with a subtract before the
7789 dereference, forcing these into a register normally takes two
7790 instructions. */
7791 x = force_operand (x, NULL_RTX);
7792 else
7794 /* For the remaining cases, force the constant into a register. */
7795 xop1 = force_reg (SImode, xop1);
7796 x = gen_rtx_PLUS (SImode, xop0, xop1);
7799 else if (GET_CODE (x) == PLUS
7800 && s_register_operand (XEXP (x, 1), SImode)
7801 && !s_register_operand (XEXP (x, 0), SImode))
7803 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7805 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7808 if (flag_pic)
7810 /* We need to find and carefully transform any SYMBOL and LABEL
7811 references; so go back to the original address expression. */
7812 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7814 if (new_x != orig_x)
7815 x = new_x;
7818 return x;
7821 bool
7822 arm_legitimize_reload_address (rtx *p,
7823 machine_mode mode,
7824 int opnum, int type,
7825 int ind_levels ATTRIBUTE_UNUSED)
7827 /* We must recognize output that we have already generated ourselves. */
7828 if (GET_CODE (*p) == PLUS
7829 && GET_CODE (XEXP (*p, 0)) == PLUS
7830 && REG_P (XEXP (XEXP (*p, 0), 0))
7831 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7832 && CONST_INT_P (XEXP (*p, 1)))
7834 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7835 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7836 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7837 return true;
7840 if (GET_CODE (*p) == PLUS
7841 && REG_P (XEXP (*p, 0))
7842 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7843 /* If the base register is equivalent to a constant, let the generic
7844 code handle it. Otherwise we will run into problems if a future
7845 reload pass decides to rematerialize the constant. */
7846 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7847 && CONST_INT_P (XEXP (*p, 1)))
7849 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7850 HOST_WIDE_INT low, high;
7852 /* Detect coprocessor load/stores. */
7853 bool coproc_p = ((TARGET_HARD_FLOAT
7854 && TARGET_VFP
7855 && (mode == SFmode || mode == DFmode))
7856 || (TARGET_REALLY_IWMMXT
7857 && VALID_IWMMXT_REG_MODE (mode))
7858 || (TARGET_NEON
7859 && (VALID_NEON_DREG_MODE (mode)
7860 || VALID_NEON_QREG_MODE (mode))));
7862 /* For some conditions, bail out when lower two bits are unaligned. */
7863 if ((val & 0x3) != 0
7864 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7865 && (coproc_p
7866 /* For DI, and DF under soft-float: */
7867 || ((mode == DImode || mode == DFmode)
7868 /* Without ldrd, we use stm/ldm, which does not
7869 fair well with unaligned bits. */
7870 && (! TARGET_LDRD
7871 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7872 || TARGET_THUMB2))))
7873 return false;
7875 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7876 of which the (reg+high) gets turned into a reload add insn,
7877 we try to decompose the index into high/low values that can often
7878 also lead to better reload CSE.
7879 For example:
7880 ldr r0, [r2, #4100] // Offset too large
7881 ldr r1, [r2, #4104] // Offset too large
7883 is best reloaded as:
7884 add t1, r2, #4096
7885 ldr r0, [t1, #4]
7886 add t2, r2, #4096
7887 ldr r1, [t2, #8]
7889 which post-reload CSE can simplify in most cases to eliminate the
7890 second add instruction:
7891 add t1, r2, #4096
7892 ldr r0, [t1, #4]
7893 ldr r1, [t1, #8]
7895 The idea here is that we want to split out the bits of the constant
7896 as a mask, rather than as subtracting the maximum offset that the
7897 respective type of load/store used can handle.
7899 When encountering negative offsets, we can still utilize it even if
7900 the overall offset is positive; sometimes this may lead to an immediate
7901 that can be constructed with fewer instructions.
7902 For example:
7903 ldr r0, [r2, #0x3FFFFC]
7905 This is best reloaded as:
7906 add t1, r2, #0x400000
7907 ldr r0, [t1, #-4]
7909 The trick for spotting this for a load insn with N bits of offset
7910 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7911 negative offset that is going to make bit N and all the bits below
7912 it become zero in the remainder part.
7914 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7915 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7916 used in most cases of ARM load/store instructions. */
7918 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7919 (((VAL) & ((1 << (N)) - 1)) \
7920 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7921 : 0)
7923 if (coproc_p)
7925 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
7927 /* NEON quad-word load/stores are made of two double-word accesses,
7928 so the valid index range is reduced by 8. Treat as 9-bit range if
7929 we go over it. */
7930 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
7931 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
7933 else if (GET_MODE_SIZE (mode) == 8)
7935 if (TARGET_LDRD)
7936 low = (TARGET_THUMB2
7937 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
7938 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
7939 else
7940 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7941 to access doublewords. The supported load/store offsets are
7942 -8, -4, and 4, which we try to produce here. */
7943 low = ((val & 0xf) ^ 0x8) - 0x8;
7945 else if (GET_MODE_SIZE (mode) < 8)
7947 /* NEON element load/stores do not have an offset. */
7948 if (TARGET_NEON_FP16 && mode == HFmode)
7949 return false;
7951 if (TARGET_THUMB2)
7953 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7954 Try the wider 12-bit range first, and re-try if the result
7955 is out of range. */
7956 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7957 if (low < -255)
7958 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7960 else
7962 if (mode == HImode || mode == HFmode)
7964 if (arm_arch4)
7965 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7966 else
7968 /* The storehi/movhi_bytes fallbacks can use only
7969 [-4094,+4094] of the full ldrb/strb index range. */
7970 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7971 if (low == 4095 || low == -4095)
7972 return false;
7975 else
7976 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7979 else
7980 return false;
7982 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
7983 ^ (unsigned HOST_WIDE_INT) 0x80000000)
7984 - (unsigned HOST_WIDE_INT) 0x80000000);
7985 /* Check for overflow or zero */
7986 if (low == 0 || high == 0 || (high + low != val))
7987 return false;
7989 /* Reload the high part into a base reg; leave the low part
7990 in the mem.
7991 Note that replacing this gen_rtx_PLUS with plus_constant is
7992 wrong in this case because we rely on the
7993 (plus (plus reg c1) c2) structure being preserved so that
7994 XEXP (*p, 0) in push_reload below uses the correct term. */
7995 *p = gen_rtx_PLUS (GET_MODE (*p),
7996 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
7997 GEN_INT (high)),
7998 GEN_INT (low));
7999 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
8000 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
8001 VOIDmode, 0, 0, opnum, (enum reload_type) type);
8002 return true;
8005 return false;
8009 thumb_legitimize_reload_address (rtx *x_p,
8010 machine_mode mode,
8011 int opnum, int type,
8012 int ind_levels ATTRIBUTE_UNUSED)
8014 rtx x = *x_p;
8016 if (GET_CODE (x) == PLUS
8017 && GET_MODE_SIZE (mode) < 4
8018 && REG_P (XEXP (x, 0))
8019 && XEXP (x, 0) == stack_pointer_rtx
8020 && CONST_INT_P (XEXP (x, 1))
8021 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8023 rtx orig_x = x;
8025 x = copy_rtx (x);
8026 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8027 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8028 return x;
8031 /* If both registers are hi-regs, then it's better to reload the
8032 entire expression rather than each register individually. That
8033 only requires one reload register rather than two. */
8034 if (GET_CODE (x) == PLUS
8035 && REG_P (XEXP (x, 0))
8036 && REG_P (XEXP (x, 1))
8037 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
8038 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
8040 rtx orig_x = x;
8042 x = copy_rtx (x);
8043 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8044 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8045 return x;
8048 return NULL;
8051 /* Return TRUE if X contains any TLS symbol references. */
8053 bool
8054 arm_tls_referenced_p (rtx x)
8056 if (! TARGET_HAVE_TLS)
8057 return false;
8059 subrtx_iterator::array_type array;
8060 FOR_EACH_SUBRTX (iter, array, x, ALL)
8062 const_rtx x = *iter;
8063 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8064 return true;
8066 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8067 TLS offsets, not real symbol references. */
8068 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8069 iter.skip_subrtxes ();
8071 return false;
8074 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8076 On the ARM, allow any integer (invalid ones are removed later by insn
8077 patterns), nice doubles and symbol_refs which refer to the function's
8078 constant pool XXX.
8080 When generating pic allow anything. */
8082 static bool
8083 arm_legitimate_constant_p_1 (machine_mode mode, rtx x)
8085 /* At present, we have no support for Neon structure constants, so forbid
8086 them here. It might be possible to handle simple cases like 0 and -1
8087 in future. */
8088 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8089 return false;
8091 return flag_pic || !label_mentioned_p (x);
8094 static bool
8095 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8097 return (CONST_INT_P (x)
8098 || CONST_DOUBLE_P (x)
8099 || CONSTANT_ADDRESS_P (x)
8100 || flag_pic);
8103 static bool
8104 arm_legitimate_constant_p (machine_mode mode, rtx x)
8106 return (!arm_cannot_force_const_mem (mode, x)
8107 && (TARGET_32BIT
8108 ? arm_legitimate_constant_p_1 (mode, x)
8109 : thumb_legitimate_constant_p (mode, x)));
8112 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8114 static bool
8115 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8117 rtx base, offset;
8119 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8121 split_const (x, &base, &offset);
8122 if (GET_CODE (base) == SYMBOL_REF
8123 && !offset_within_block_p (base, INTVAL (offset)))
8124 return true;
8126 return arm_tls_referenced_p (x);
8129 #define REG_OR_SUBREG_REG(X) \
8130 (REG_P (X) \
8131 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8133 #define REG_OR_SUBREG_RTX(X) \
8134 (REG_P (X) ? (X) : SUBREG_REG (X))
8136 static inline int
8137 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8139 machine_mode mode = GET_MODE (x);
8140 int total, words;
8142 switch (code)
8144 case ASHIFT:
8145 case ASHIFTRT:
8146 case LSHIFTRT:
8147 case ROTATERT:
8148 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8150 case PLUS:
8151 case MINUS:
8152 case COMPARE:
8153 case NEG:
8154 case NOT:
8155 return COSTS_N_INSNS (1);
8157 case MULT:
8158 if (CONST_INT_P (XEXP (x, 1)))
8160 int cycles = 0;
8161 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8163 while (i)
8165 i >>= 2;
8166 cycles++;
8168 return COSTS_N_INSNS (2) + cycles;
8170 return COSTS_N_INSNS (1) + 16;
8172 case SET:
8173 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8174 the mode. */
8175 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8176 return (COSTS_N_INSNS (words)
8177 + 4 * ((MEM_P (SET_SRC (x)))
8178 + MEM_P (SET_DEST (x))));
8180 case CONST_INT:
8181 if (outer == SET)
8183 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8184 return 0;
8185 if (thumb_shiftable_const (INTVAL (x)))
8186 return COSTS_N_INSNS (2);
8187 return COSTS_N_INSNS (3);
8189 else if ((outer == PLUS || outer == COMPARE)
8190 && INTVAL (x) < 256 && INTVAL (x) > -256)
8191 return 0;
8192 else if ((outer == IOR || outer == XOR || outer == AND)
8193 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8194 return COSTS_N_INSNS (1);
8195 else if (outer == AND)
8197 int i;
8198 /* This duplicates the tests in the andsi3 expander. */
8199 for (i = 9; i <= 31; i++)
8200 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8201 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8202 return COSTS_N_INSNS (2);
8204 else if (outer == ASHIFT || outer == ASHIFTRT
8205 || outer == LSHIFTRT)
8206 return 0;
8207 return COSTS_N_INSNS (2);
8209 case CONST:
8210 case CONST_DOUBLE:
8211 case LABEL_REF:
8212 case SYMBOL_REF:
8213 return COSTS_N_INSNS (3);
8215 case UDIV:
8216 case UMOD:
8217 case DIV:
8218 case MOD:
8219 return 100;
8221 case TRUNCATE:
8222 return 99;
8224 case AND:
8225 case XOR:
8226 case IOR:
8227 /* XXX guess. */
8228 return 8;
8230 case MEM:
8231 /* XXX another guess. */
8232 /* Memory costs quite a lot for the first word, but subsequent words
8233 load at the equivalent of a single insn each. */
8234 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8235 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8236 ? 4 : 0));
8238 case IF_THEN_ELSE:
8239 /* XXX a guess. */
8240 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8241 return 14;
8242 return 2;
8244 case SIGN_EXTEND:
8245 case ZERO_EXTEND:
8246 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8247 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8249 if (mode == SImode)
8250 return total;
8252 if (arm_arch6)
8253 return total + COSTS_N_INSNS (1);
8255 /* Assume a two-shift sequence. Increase the cost slightly so
8256 we prefer actual shifts over an extend operation. */
8257 return total + 1 + COSTS_N_INSNS (2);
8259 default:
8260 return 99;
8264 static inline bool
8265 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8267 machine_mode mode = GET_MODE (x);
8268 enum rtx_code subcode;
8269 rtx operand;
8270 enum rtx_code code = GET_CODE (x);
8271 *total = 0;
8273 switch (code)
8275 case MEM:
8276 /* Memory costs quite a lot for the first word, but subsequent words
8277 load at the equivalent of a single insn each. */
8278 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8279 return true;
8281 case DIV:
8282 case MOD:
8283 case UDIV:
8284 case UMOD:
8285 if (TARGET_HARD_FLOAT && mode == SFmode)
8286 *total = COSTS_N_INSNS (2);
8287 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8288 *total = COSTS_N_INSNS (4);
8289 else
8290 *total = COSTS_N_INSNS (20);
8291 return false;
8293 case ROTATE:
8294 if (REG_P (XEXP (x, 1)))
8295 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8296 else if (!CONST_INT_P (XEXP (x, 1)))
8297 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8299 /* Fall through */
8300 case ROTATERT:
8301 if (mode != SImode)
8303 *total += COSTS_N_INSNS (4);
8304 return true;
8307 /* Fall through */
8308 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8309 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8310 if (mode == DImode)
8312 *total += COSTS_N_INSNS (3);
8313 return true;
8316 *total += COSTS_N_INSNS (1);
8317 /* Increase the cost of complex shifts because they aren't any faster,
8318 and reduce dual issue opportunities. */
8319 if (arm_tune_cortex_a9
8320 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8321 ++*total;
8323 return true;
8325 case MINUS:
8326 if (mode == DImode)
8328 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8329 if (CONST_INT_P (XEXP (x, 0))
8330 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8332 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8333 return true;
8336 if (CONST_INT_P (XEXP (x, 1))
8337 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8339 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8340 return true;
8343 return false;
8346 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8348 if (TARGET_HARD_FLOAT
8349 && (mode == SFmode
8350 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8352 *total = COSTS_N_INSNS (1);
8353 if (CONST_DOUBLE_P (XEXP (x, 0))
8354 && arm_const_double_rtx (XEXP (x, 0)))
8356 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8357 return true;
8360 if (CONST_DOUBLE_P (XEXP (x, 1))
8361 && arm_const_double_rtx (XEXP (x, 1)))
8363 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8364 return true;
8367 return false;
8369 *total = COSTS_N_INSNS (20);
8370 return false;
8373 *total = COSTS_N_INSNS (1);
8374 if (CONST_INT_P (XEXP (x, 0))
8375 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8377 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8378 return true;
8381 subcode = GET_CODE (XEXP (x, 1));
8382 if (subcode == ASHIFT || subcode == ASHIFTRT
8383 || subcode == LSHIFTRT
8384 || subcode == ROTATE || subcode == ROTATERT)
8386 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8387 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8388 return true;
8391 /* A shift as a part of RSB costs no more than RSB itself. */
8392 if (GET_CODE (XEXP (x, 0)) == MULT
8393 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8395 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8396 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8397 return true;
8400 if (subcode == MULT
8401 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8403 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8404 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8405 return true;
8408 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8409 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8411 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8412 if (REG_P (XEXP (XEXP (x, 1), 0))
8413 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8414 *total += COSTS_N_INSNS (1);
8416 return true;
8419 /* Fall through */
8421 case PLUS:
8422 if (code == PLUS && arm_arch6 && mode == SImode
8423 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8424 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8426 *total = COSTS_N_INSNS (1);
8427 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8428 0, speed);
8429 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8430 return true;
8433 /* MLA: All arguments must be registers. We filter out
8434 multiplication by a power of two, so that we fall down into
8435 the code below. */
8436 if (GET_CODE (XEXP (x, 0)) == MULT
8437 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8439 /* The cost comes from the cost of the multiply. */
8440 return false;
8443 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8445 if (TARGET_HARD_FLOAT
8446 && (mode == SFmode
8447 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8449 *total = COSTS_N_INSNS (1);
8450 if (CONST_DOUBLE_P (XEXP (x, 1))
8451 && arm_const_double_rtx (XEXP (x, 1)))
8453 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8454 return true;
8457 return false;
8460 *total = COSTS_N_INSNS (20);
8461 return false;
8464 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8465 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8467 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8468 if (REG_P (XEXP (XEXP (x, 0), 0))
8469 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8470 *total += COSTS_N_INSNS (1);
8471 return true;
8474 /* Fall through */
8476 case AND: case XOR: case IOR:
8478 /* Normally the frame registers will be spilt into reg+const during
8479 reload, so it is a bad idea to combine them with other instructions,
8480 since then they might not be moved outside of loops. As a compromise
8481 we allow integration with ops that have a constant as their second
8482 operand. */
8483 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8484 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8485 && !CONST_INT_P (XEXP (x, 1)))
8486 *total = COSTS_N_INSNS (1);
8488 if (mode == DImode)
8490 *total += COSTS_N_INSNS (2);
8491 if (CONST_INT_P (XEXP (x, 1))
8492 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8494 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8495 return true;
8498 return false;
8501 *total += COSTS_N_INSNS (1);
8502 if (CONST_INT_P (XEXP (x, 1))
8503 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8505 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8506 return true;
8508 subcode = GET_CODE (XEXP (x, 0));
8509 if (subcode == ASHIFT || subcode == ASHIFTRT
8510 || subcode == LSHIFTRT
8511 || subcode == ROTATE || subcode == ROTATERT)
8513 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8514 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8515 return true;
8518 if (subcode == MULT
8519 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8521 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8522 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8523 return true;
8526 if (subcode == UMIN || subcode == UMAX
8527 || subcode == SMIN || subcode == SMAX)
8529 *total = COSTS_N_INSNS (3);
8530 return true;
8533 return false;
8535 case MULT:
8536 /* This should have been handled by the CPU specific routines. */
8537 gcc_unreachable ();
8539 case TRUNCATE:
8540 if (arm_arch3m && mode == SImode
8541 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8542 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8543 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8544 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8545 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8546 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8548 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8549 return true;
8551 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8552 return false;
8554 case NEG:
8555 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8557 if (TARGET_HARD_FLOAT
8558 && (mode == SFmode
8559 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8561 *total = COSTS_N_INSNS (1);
8562 return false;
8564 *total = COSTS_N_INSNS (2);
8565 return false;
8568 /* Fall through */
8569 case NOT:
8570 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8571 if (mode == SImode && code == NOT)
8573 subcode = GET_CODE (XEXP (x, 0));
8574 if (subcode == ASHIFT || subcode == ASHIFTRT
8575 || subcode == LSHIFTRT
8576 || subcode == ROTATE || subcode == ROTATERT
8577 || (subcode == MULT
8578 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8580 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8581 /* Register shifts cost an extra cycle. */
8582 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8583 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8584 subcode, 1, speed);
8585 return true;
8589 return false;
8591 case IF_THEN_ELSE:
8592 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8594 *total = COSTS_N_INSNS (4);
8595 return true;
8598 operand = XEXP (x, 0);
8600 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8601 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8602 && REG_P (XEXP (operand, 0))
8603 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8604 *total += COSTS_N_INSNS (1);
8605 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8606 + rtx_cost (XEXP (x, 2), code, 2, speed));
8607 return true;
8609 case NE:
8610 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8612 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8613 return true;
8615 goto scc_insn;
8617 case GE:
8618 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8619 && mode == SImode && XEXP (x, 1) == const0_rtx)
8621 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8622 return true;
8624 goto scc_insn;
8626 case LT:
8627 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8628 && mode == SImode && XEXP (x, 1) == const0_rtx)
8630 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8631 return true;
8633 goto scc_insn;
8635 case EQ:
8636 case GT:
8637 case LE:
8638 case GEU:
8639 case LTU:
8640 case GTU:
8641 case LEU:
8642 case UNORDERED:
8643 case ORDERED:
8644 case UNEQ:
8645 case UNGE:
8646 case UNLT:
8647 case UNGT:
8648 case UNLE:
8649 scc_insn:
8650 /* SCC insns. In the case where the comparison has already been
8651 performed, then they cost 2 instructions. Otherwise they need
8652 an additional comparison before them. */
8653 *total = COSTS_N_INSNS (2);
8654 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8656 return true;
8659 /* Fall through */
8660 case COMPARE:
8661 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8663 *total = 0;
8664 return true;
8667 *total += COSTS_N_INSNS (1);
8668 if (CONST_INT_P (XEXP (x, 1))
8669 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8671 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8672 return true;
8675 subcode = GET_CODE (XEXP (x, 0));
8676 if (subcode == ASHIFT || subcode == ASHIFTRT
8677 || subcode == LSHIFTRT
8678 || subcode == ROTATE || subcode == ROTATERT)
8680 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8681 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8682 return true;
8685 if (subcode == MULT
8686 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8688 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8689 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8690 return true;
8693 return false;
8695 case UMIN:
8696 case UMAX:
8697 case SMIN:
8698 case SMAX:
8699 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8700 if (!CONST_INT_P (XEXP (x, 1))
8701 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8702 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8703 return true;
8705 case ABS:
8706 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8708 if (TARGET_HARD_FLOAT
8709 && (mode == SFmode
8710 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8712 *total = COSTS_N_INSNS (1);
8713 return false;
8715 *total = COSTS_N_INSNS (20);
8716 return false;
8718 *total = COSTS_N_INSNS (1);
8719 if (mode == DImode)
8720 *total += COSTS_N_INSNS (3);
8721 return false;
8723 case SIGN_EXTEND:
8724 case ZERO_EXTEND:
8725 *total = 0;
8726 if (GET_MODE_CLASS (mode) == MODE_INT)
8728 rtx op = XEXP (x, 0);
8729 machine_mode opmode = GET_MODE (op);
8731 if (mode == DImode)
8732 *total += COSTS_N_INSNS (1);
8734 if (opmode != SImode)
8736 if (MEM_P (op))
8738 /* If !arm_arch4, we use one of the extendhisi2_mem
8739 or movhi_bytes patterns for HImode. For a QImode
8740 sign extension, we first zero-extend from memory
8741 and then perform a shift sequence. */
8742 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8743 *total += COSTS_N_INSNS (2);
8745 else if (arm_arch6)
8746 *total += COSTS_N_INSNS (1);
8748 /* We don't have the necessary insn, so we need to perform some
8749 other operation. */
8750 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8751 /* An and with constant 255. */
8752 *total += COSTS_N_INSNS (1);
8753 else
8754 /* A shift sequence. Increase costs slightly to avoid
8755 combining two shifts into an extend operation. */
8756 *total += COSTS_N_INSNS (2) + 1;
8759 return false;
8762 switch (GET_MODE (XEXP (x, 0)))
8764 case V8QImode:
8765 case V4HImode:
8766 case V2SImode:
8767 case V4QImode:
8768 case V2HImode:
8769 *total = COSTS_N_INSNS (1);
8770 return false;
8772 default:
8773 gcc_unreachable ();
8775 gcc_unreachable ();
8777 case ZERO_EXTRACT:
8778 case SIGN_EXTRACT:
8779 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8780 return true;
8782 case CONST_INT:
8783 if (const_ok_for_arm (INTVAL (x))
8784 || const_ok_for_arm (~INTVAL (x)))
8785 *total = COSTS_N_INSNS (1);
8786 else
8787 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8788 INTVAL (x), NULL_RTX,
8789 NULL_RTX, 0, 0));
8790 return true;
8792 case CONST:
8793 case LABEL_REF:
8794 case SYMBOL_REF:
8795 *total = COSTS_N_INSNS (3);
8796 return true;
8798 case HIGH:
8799 *total = COSTS_N_INSNS (1);
8800 return true;
8802 case LO_SUM:
8803 *total = COSTS_N_INSNS (1);
8804 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8805 return true;
8807 case CONST_DOUBLE:
8808 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8809 && (mode == SFmode || !TARGET_VFP_SINGLE))
8810 *total = COSTS_N_INSNS (1);
8811 else
8812 *total = COSTS_N_INSNS (4);
8813 return true;
8815 case SET:
8816 /* The vec_extract patterns accept memory operands that require an
8817 address reload. Account for the cost of that reload to give the
8818 auto-inc-dec pass an incentive to try to replace them. */
8819 if (TARGET_NEON && MEM_P (SET_DEST (x))
8820 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8822 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8823 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8824 *total += COSTS_N_INSNS (1);
8825 return true;
8827 /* Likewise for the vec_set patterns. */
8828 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8829 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8830 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8832 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8833 *total = rtx_cost (mem, code, 0, speed);
8834 if (!neon_vector_mem_operand (mem, 2, true))
8835 *total += COSTS_N_INSNS (1);
8836 return true;
8838 return false;
8840 case UNSPEC:
8841 /* We cost this as high as our memory costs to allow this to
8842 be hoisted from loops. */
8843 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8845 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8847 return true;
8849 case CONST_VECTOR:
8850 if (TARGET_NEON
8851 && TARGET_HARD_FLOAT
8852 && outer == SET
8853 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8854 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8855 *total = COSTS_N_INSNS (1);
8856 else
8857 *total = COSTS_N_INSNS (4);
8858 return true;
8860 default:
8861 *total = COSTS_N_INSNS (4);
8862 return false;
8866 /* Estimates the size cost of thumb1 instructions.
8867 For now most of the code is copied from thumb1_rtx_costs. We need more
8868 fine grain tuning when we have more related test cases. */
8869 static inline int
8870 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8872 machine_mode mode = GET_MODE (x);
8873 int words;
8875 switch (code)
8877 case ASHIFT:
8878 case ASHIFTRT:
8879 case LSHIFTRT:
8880 case ROTATERT:
8881 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8883 case PLUS:
8884 case MINUS:
8885 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8886 defined by RTL expansion, especially for the expansion of
8887 multiplication. */
8888 if ((GET_CODE (XEXP (x, 0)) == MULT
8889 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8890 || (GET_CODE (XEXP (x, 1)) == MULT
8891 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8892 return COSTS_N_INSNS (2);
8893 /* On purpose fall through for normal RTX. */
8894 case COMPARE:
8895 case NEG:
8896 case NOT:
8897 return COSTS_N_INSNS (1);
8899 case MULT:
8900 if (CONST_INT_P (XEXP (x, 1)))
8902 /* Thumb1 mul instruction can't operate on const. We must Load it
8903 into a register first. */
8904 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8905 /* For the targets which have a very small and high-latency multiply
8906 unit, we prefer to synthesize the mult with up to 5 instructions,
8907 giving a good balance between size and performance. */
8908 if (arm_arch6m && arm_m_profile_small_mul)
8909 return COSTS_N_INSNS (5);
8910 else
8911 return COSTS_N_INSNS (1) + const_size;
8913 return COSTS_N_INSNS (1);
8915 case SET:
8916 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8917 the mode. */
8918 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8919 return COSTS_N_INSNS (words)
8920 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x))
8921 || satisfies_constraint_K (SET_SRC (x))
8922 /* thumb1_movdi_insn. */
8923 || ((words > 1) && MEM_P (SET_SRC (x))));
8925 case CONST_INT:
8926 if (outer == SET)
8928 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8929 return COSTS_N_INSNS (1);
8930 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8931 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8932 return COSTS_N_INSNS (2);
8933 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8934 if (thumb_shiftable_const (INTVAL (x)))
8935 return COSTS_N_INSNS (2);
8936 return COSTS_N_INSNS (3);
8938 else if ((outer == PLUS || outer == COMPARE)
8939 && INTVAL (x) < 256 && INTVAL (x) > -256)
8940 return 0;
8941 else if ((outer == IOR || outer == XOR || outer == AND)
8942 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8943 return COSTS_N_INSNS (1);
8944 else if (outer == AND)
8946 int i;
8947 /* This duplicates the tests in the andsi3 expander. */
8948 for (i = 9; i <= 31; i++)
8949 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8950 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8951 return COSTS_N_INSNS (2);
8953 else if (outer == ASHIFT || outer == ASHIFTRT
8954 || outer == LSHIFTRT)
8955 return 0;
8956 return COSTS_N_INSNS (2);
8958 case CONST:
8959 case CONST_DOUBLE:
8960 case LABEL_REF:
8961 case SYMBOL_REF:
8962 return COSTS_N_INSNS (3);
8964 case UDIV:
8965 case UMOD:
8966 case DIV:
8967 case MOD:
8968 return 100;
8970 case TRUNCATE:
8971 return 99;
8973 case AND:
8974 case XOR:
8975 case IOR:
8976 return COSTS_N_INSNS (1);
8978 case MEM:
8979 return (COSTS_N_INSNS (1)
8980 + COSTS_N_INSNS (1)
8981 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8982 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8983 ? COSTS_N_INSNS (1) : 0));
8985 case IF_THEN_ELSE:
8986 /* XXX a guess. */
8987 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8988 return 14;
8989 return 2;
8991 case ZERO_EXTEND:
8992 /* XXX still guessing. */
8993 switch (GET_MODE (XEXP (x, 0)))
8995 case QImode:
8996 return (1 + (mode == DImode ? 4 : 0)
8997 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8999 case HImode:
9000 return (4 + (mode == DImode ? 4 : 0)
9001 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9003 case SImode:
9004 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9006 default:
9007 return 99;
9010 default:
9011 return 99;
9015 /* RTX costs when optimizing for size. */
9016 static bool
9017 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9018 int *total)
9020 machine_mode mode = GET_MODE (x);
9021 if (TARGET_THUMB1)
9023 *total = thumb1_size_rtx_costs (x, code, outer_code);
9024 return true;
9027 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9028 switch (code)
9030 case MEM:
9031 /* A memory access costs 1 insn if the mode is small, or the address is
9032 a single register, otherwise it costs one insn per word. */
9033 if (REG_P (XEXP (x, 0)))
9034 *total = COSTS_N_INSNS (1);
9035 else if (flag_pic
9036 && GET_CODE (XEXP (x, 0)) == PLUS
9037 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9038 /* This will be split into two instructions.
9039 See arm.md:calculate_pic_address. */
9040 *total = COSTS_N_INSNS (2);
9041 else
9042 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9043 return true;
9045 case DIV:
9046 case MOD:
9047 case UDIV:
9048 case UMOD:
9049 /* Needs a libcall, so it costs about this. */
9050 *total = COSTS_N_INSNS (2);
9051 return false;
9053 case ROTATE:
9054 if (mode == SImode && REG_P (XEXP (x, 1)))
9056 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
9057 return true;
9059 /* Fall through */
9060 case ROTATERT:
9061 case ASHIFT:
9062 case LSHIFTRT:
9063 case ASHIFTRT:
9064 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9066 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
9067 return true;
9069 else if (mode == SImode)
9071 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
9072 /* Slightly disparage register shifts, but not by much. */
9073 if (!CONST_INT_P (XEXP (x, 1)))
9074 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
9075 return true;
9078 /* Needs a libcall. */
9079 *total = COSTS_N_INSNS (2);
9080 return false;
9082 case MINUS:
9083 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9084 && (mode == SFmode || !TARGET_VFP_SINGLE))
9086 *total = COSTS_N_INSNS (1);
9087 return false;
9090 if (mode == SImode)
9092 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9093 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9095 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9096 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9097 || subcode1 == ROTATE || subcode1 == ROTATERT
9098 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9099 || subcode1 == ASHIFTRT)
9101 /* It's just the cost of the two operands. */
9102 *total = 0;
9103 return false;
9106 *total = COSTS_N_INSNS (1);
9107 return false;
9110 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9111 return false;
9113 case PLUS:
9114 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9115 && (mode == SFmode || !TARGET_VFP_SINGLE))
9117 *total = COSTS_N_INSNS (1);
9118 return false;
9121 /* A shift as a part of ADD costs nothing. */
9122 if (GET_CODE (XEXP (x, 0)) == MULT
9123 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9125 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9126 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
9127 *total += rtx_cost (XEXP (x, 1), code, 1, false);
9128 return true;
9131 /* Fall through */
9132 case AND: case XOR: case IOR:
9133 if (mode == SImode)
9135 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9137 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9138 || subcode == LSHIFTRT || subcode == ASHIFTRT
9139 || (code == AND && subcode == NOT))
9141 /* It's just the cost of the two operands. */
9142 *total = 0;
9143 return false;
9147 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9148 return false;
9150 case MULT:
9151 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9152 return false;
9154 case NEG:
9155 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9156 && (mode == SFmode || !TARGET_VFP_SINGLE))
9158 *total = COSTS_N_INSNS (1);
9159 return false;
9162 /* Fall through */
9163 case NOT:
9164 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9166 return false;
9168 case IF_THEN_ELSE:
9169 *total = 0;
9170 return false;
9172 case COMPARE:
9173 if (cc_register (XEXP (x, 0), VOIDmode))
9174 * total = 0;
9175 else
9176 *total = COSTS_N_INSNS (1);
9177 return false;
9179 case ABS:
9180 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9181 && (mode == SFmode || !TARGET_VFP_SINGLE))
9182 *total = COSTS_N_INSNS (1);
9183 else
9184 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9185 return false;
9187 case SIGN_EXTEND:
9188 case ZERO_EXTEND:
9189 return arm_rtx_costs_1 (x, outer_code, total, 0);
9191 case CONST_INT:
9192 if (const_ok_for_arm (INTVAL (x)))
9193 /* A multiplication by a constant requires another instruction
9194 to load the constant to a register. */
9195 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9196 ? 1 : 0);
9197 else if (const_ok_for_arm (~INTVAL (x)))
9198 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9199 else if (const_ok_for_arm (-INTVAL (x)))
9201 if (outer_code == COMPARE || outer_code == PLUS
9202 || outer_code == MINUS)
9203 *total = 0;
9204 else
9205 *total = COSTS_N_INSNS (1);
9207 else
9208 *total = COSTS_N_INSNS (2);
9209 return true;
9211 case CONST:
9212 case LABEL_REF:
9213 case SYMBOL_REF:
9214 *total = COSTS_N_INSNS (2);
9215 return true;
9217 case CONST_DOUBLE:
9218 *total = COSTS_N_INSNS (4);
9219 return true;
9221 case CONST_VECTOR:
9222 if (TARGET_NEON
9223 && TARGET_HARD_FLOAT
9224 && outer_code == SET
9225 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9226 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9227 *total = COSTS_N_INSNS (1);
9228 else
9229 *total = COSTS_N_INSNS (4);
9230 return true;
9232 case HIGH:
9233 case LO_SUM:
9234 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9235 cost of these slightly. */
9236 *total = COSTS_N_INSNS (1) + 1;
9237 return true;
9239 case SET:
9240 return false;
9242 default:
9243 if (mode != VOIDmode)
9244 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9245 else
9246 *total = COSTS_N_INSNS (4); /* How knows? */
9247 return false;
9251 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9252 operand, then return the operand that is being shifted. If the shift
9253 is not by a constant, then set SHIFT_REG to point to the operand.
9254 Return NULL if OP is not a shifter operand. */
9255 static rtx
9256 shifter_op_p (rtx op, rtx *shift_reg)
9258 enum rtx_code code = GET_CODE (op);
9260 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9261 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9262 return XEXP (op, 0);
9263 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9264 return XEXP (op, 0);
9265 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9266 || code == ASHIFTRT)
9268 if (!CONST_INT_P (XEXP (op, 1)))
9269 *shift_reg = XEXP (op, 1);
9270 return XEXP (op, 0);
9273 return NULL;
9276 static bool
9277 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9279 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9280 gcc_assert (GET_CODE (x) == UNSPEC);
9282 switch (XINT (x, 1))
9284 case UNSPEC_UNALIGNED_LOAD:
9285 /* We can only do unaligned loads into the integer unit, and we can't
9286 use LDM or LDRD. */
9287 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9288 if (speed_p)
9289 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9290 + extra_cost->ldst.load_unaligned);
9292 #ifdef NOT_YET
9293 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9294 ADDR_SPACE_GENERIC, speed_p);
9295 #endif
9296 return true;
9298 case UNSPEC_UNALIGNED_STORE:
9299 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9300 if (speed_p)
9301 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9302 + extra_cost->ldst.store_unaligned);
9304 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9305 #ifdef NOT_YET
9306 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9307 ADDR_SPACE_GENERIC, speed_p);
9308 #endif
9309 return true;
9311 case UNSPEC_VRINTZ:
9312 case UNSPEC_VRINTP:
9313 case UNSPEC_VRINTM:
9314 case UNSPEC_VRINTR:
9315 case UNSPEC_VRINTX:
9316 case UNSPEC_VRINTA:
9317 *cost = COSTS_N_INSNS (1);
9318 if (speed_p)
9319 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9321 return true;
9322 default:
9323 *cost = COSTS_N_INSNS (2);
9324 break;
9326 return false;
9329 /* Cost of a libcall. We assume one insn per argument, an amount for the
9330 call (one insn for -Os) and then one for processing the result. */
9331 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9333 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9334 do \
9336 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9337 if (shift_op != NULL \
9338 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9340 if (shift_reg) \
9342 if (speed_p) \
9343 *cost += extra_cost->alu.arith_shift_reg; \
9344 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9346 else if (speed_p) \
9347 *cost += extra_cost->alu.arith_shift; \
9349 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9350 + rtx_cost (XEXP (x, 1 - IDX), \
9351 OP, 1, speed_p)); \
9352 return true; \
9355 while (0);
9357 /* RTX costs. Make an estimate of the cost of executing the operation
9358 X, which is contained with an operation with code OUTER_CODE.
9359 SPEED_P indicates whether the cost desired is the performance cost,
9360 or the size cost. The estimate is stored in COST and the return
9361 value is TRUE if the cost calculation is final, or FALSE if the
9362 caller should recurse through the operands of X to add additional
9363 costs.
9365 We currently make no attempt to model the size savings of Thumb-2
9366 16-bit instructions. At the normal points in compilation where
9367 this code is called we have no measure of whether the condition
9368 flags are live or not, and thus no realistic way to determine what
9369 the size will eventually be. */
9370 static bool
9371 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9372 const struct cpu_cost_table *extra_cost,
9373 int *cost, bool speed_p)
9375 machine_mode mode = GET_MODE (x);
9377 if (TARGET_THUMB1)
9379 if (speed_p)
9380 *cost = thumb1_rtx_costs (x, code, outer_code);
9381 else
9382 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9383 return true;
9386 switch (code)
9388 case SET:
9389 *cost = 0;
9390 /* SET RTXs don't have a mode so we get it from the destination. */
9391 mode = GET_MODE (SET_DEST (x));
9393 if (REG_P (SET_SRC (x))
9394 && REG_P (SET_DEST (x)))
9396 /* Assume that most copies can be done with a single insn,
9397 unless we don't have HW FP, in which case everything
9398 larger than word mode will require two insns. */
9399 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9400 && GET_MODE_SIZE (mode) > 4)
9401 || mode == DImode)
9402 ? 2 : 1);
9403 /* Conditional register moves can be encoded
9404 in 16 bits in Thumb mode. */
9405 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9406 *cost >>= 1;
9408 return true;
9411 if (CONST_INT_P (SET_SRC (x)))
9413 /* Handle CONST_INT here, since the value doesn't have a mode
9414 and we would otherwise be unable to work out the true cost. */
9415 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9416 outer_code = SET;
9417 /* Slightly lower the cost of setting a core reg to a constant.
9418 This helps break up chains and allows for better scheduling. */
9419 if (REG_P (SET_DEST (x))
9420 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9421 *cost -= 1;
9422 x = SET_SRC (x);
9423 /* Immediate moves with an immediate in the range [0, 255] can be
9424 encoded in 16 bits in Thumb mode. */
9425 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9426 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9427 *cost >>= 1;
9428 goto const_int_cost;
9431 return false;
9433 case MEM:
9434 /* A memory access costs 1 insn if the mode is small, or the address is
9435 a single register, otherwise it costs one insn per word. */
9436 if (REG_P (XEXP (x, 0)))
9437 *cost = COSTS_N_INSNS (1);
9438 else if (flag_pic
9439 && GET_CODE (XEXP (x, 0)) == PLUS
9440 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9441 /* This will be split into two instructions.
9442 See arm.md:calculate_pic_address. */
9443 *cost = COSTS_N_INSNS (2);
9444 else
9445 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9447 /* For speed optimizations, add the costs of the address and
9448 accessing memory. */
9449 if (speed_p)
9450 #ifdef NOT_YET
9451 *cost += (extra_cost->ldst.load
9452 + arm_address_cost (XEXP (x, 0), mode,
9453 ADDR_SPACE_GENERIC, speed_p));
9454 #else
9455 *cost += extra_cost->ldst.load;
9456 #endif
9457 return true;
9459 case PARALLEL:
9461 /* Calculations of LDM costs are complex. We assume an initial cost
9462 (ldm_1st) which will load the number of registers mentioned in
9463 ldm_regs_per_insn_1st registers; then each additional
9464 ldm_regs_per_insn_subsequent registers cost one more insn. The
9465 formula for N regs is thus:
9467 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9468 + ldm_regs_per_insn_subsequent - 1)
9469 / ldm_regs_per_insn_subsequent).
9471 Additional costs may also be added for addressing. A similar
9472 formula is used for STM. */
9474 bool is_ldm = load_multiple_operation (x, SImode);
9475 bool is_stm = store_multiple_operation (x, SImode);
9477 *cost = COSTS_N_INSNS (1);
9479 if (is_ldm || is_stm)
9481 if (speed_p)
9483 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9484 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9485 ? extra_cost->ldst.ldm_regs_per_insn_1st
9486 : extra_cost->ldst.stm_regs_per_insn_1st;
9487 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9488 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9489 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9491 *cost += regs_per_insn_1st
9492 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9493 + regs_per_insn_sub - 1)
9494 / regs_per_insn_sub);
9495 return true;
9499 return false;
9501 case DIV:
9502 case UDIV:
9503 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9504 && (mode == SFmode || !TARGET_VFP_SINGLE))
9505 *cost = COSTS_N_INSNS (speed_p
9506 ? extra_cost->fp[mode != SFmode].div : 1);
9507 else if (mode == SImode && TARGET_IDIV)
9508 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9509 else
9510 *cost = LIBCALL_COST (2);
9511 return false; /* All arguments must be in registers. */
9513 case MOD:
9514 case UMOD:
9515 *cost = LIBCALL_COST (2);
9516 return false; /* All arguments must be in registers. */
9518 case ROTATE:
9519 if (mode == SImode && REG_P (XEXP (x, 1)))
9521 *cost = (COSTS_N_INSNS (2)
9522 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9523 if (speed_p)
9524 *cost += extra_cost->alu.shift_reg;
9525 return true;
9527 /* Fall through */
9528 case ROTATERT:
9529 case ASHIFT:
9530 case LSHIFTRT:
9531 case ASHIFTRT:
9532 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9534 *cost = (COSTS_N_INSNS (3)
9535 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9536 if (speed_p)
9537 *cost += 2 * extra_cost->alu.shift;
9538 return true;
9540 else if (mode == SImode)
9542 *cost = (COSTS_N_INSNS (1)
9543 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9544 /* Slightly disparage register shifts at -Os, but not by much. */
9545 if (!CONST_INT_P (XEXP (x, 1)))
9546 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9547 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9548 return true;
9550 else if (GET_MODE_CLASS (mode) == MODE_INT
9551 && GET_MODE_SIZE (mode) < 4)
9553 if (code == ASHIFT)
9555 *cost = (COSTS_N_INSNS (1)
9556 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9557 /* Slightly disparage register shifts at -Os, but not by
9558 much. */
9559 if (!CONST_INT_P (XEXP (x, 1)))
9560 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9561 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9563 else if (code == LSHIFTRT || code == ASHIFTRT)
9565 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9567 /* Can use SBFX/UBFX. */
9568 *cost = COSTS_N_INSNS (1);
9569 if (speed_p)
9570 *cost += extra_cost->alu.bfx;
9571 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9573 else
9575 *cost = COSTS_N_INSNS (2);
9576 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9577 if (speed_p)
9579 if (CONST_INT_P (XEXP (x, 1)))
9580 *cost += 2 * extra_cost->alu.shift;
9581 else
9582 *cost += (extra_cost->alu.shift
9583 + extra_cost->alu.shift_reg);
9585 else
9586 /* Slightly disparage register shifts. */
9587 *cost += !CONST_INT_P (XEXP (x, 1));
9590 else /* Rotates. */
9592 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9593 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9594 if (speed_p)
9596 if (CONST_INT_P (XEXP (x, 1)))
9597 *cost += (2 * extra_cost->alu.shift
9598 + extra_cost->alu.log_shift);
9599 else
9600 *cost += (extra_cost->alu.shift
9601 + extra_cost->alu.shift_reg
9602 + extra_cost->alu.log_shift_reg);
9605 return true;
9608 *cost = LIBCALL_COST (2);
9609 return false;
9611 case BSWAP:
9612 if (arm_arch6)
9614 if (mode == SImode)
9616 *cost = COSTS_N_INSNS (1);
9617 if (speed_p)
9618 *cost += extra_cost->alu.rev;
9620 return false;
9623 else
9625 /* No rev instruction available. Look at arm_legacy_rev
9626 and thumb_legacy_rev for the form of RTL used then. */
9627 if (TARGET_THUMB)
9629 *cost = COSTS_N_INSNS (10);
9631 if (speed_p)
9633 *cost += 6 * extra_cost->alu.shift;
9634 *cost += 3 * extra_cost->alu.logical;
9637 else
9639 *cost = COSTS_N_INSNS (5);
9641 if (speed_p)
9643 *cost += 2 * extra_cost->alu.shift;
9644 *cost += extra_cost->alu.arith_shift;
9645 *cost += 2 * extra_cost->alu.logical;
9648 return true;
9650 return false;
9652 case MINUS:
9653 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9654 && (mode == SFmode || !TARGET_VFP_SINGLE))
9656 *cost = COSTS_N_INSNS (1);
9657 if (GET_CODE (XEXP (x, 0)) == MULT
9658 || GET_CODE (XEXP (x, 1)) == MULT)
9660 rtx mul_op0, mul_op1, sub_op;
9662 if (speed_p)
9663 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9665 if (GET_CODE (XEXP (x, 0)) == MULT)
9667 mul_op0 = XEXP (XEXP (x, 0), 0);
9668 mul_op1 = XEXP (XEXP (x, 0), 1);
9669 sub_op = XEXP (x, 1);
9671 else
9673 mul_op0 = XEXP (XEXP (x, 1), 0);
9674 mul_op1 = XEXP (XEXP (x, 1), 1);
9675 sub_op = XEXP (x, 0);
9678 /* The first operand of the multiply may be optionally
9679 negated. */
9680 if (GET_CODE (mul_op0) == NEG)
9681 mul_op0 = XEXP (mul_op0, 0);
9683 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9684 + rtx_cost (mul_op1, code, 0, speed_p)
9685 + rtx_cost (sub_op, code, 0, speed_p));
9687 return true;
9690 if (speed_p)
9691 *cost += extra_cost->fp[mode != SFmode].addsub;
9692 return false;
9695 if (mode == SImode)
9697 rtx shift_by_reg = NULL;
9698 rtx shift_op;
9699 rtx non_shift_op;
9701 *cost = COSTS_N_INSNS (1);
9703 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9704 if (shift_op == NULL)
9706 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9707 non_shift_op = XEXP (x, 0);
9709 else
9710 non_shift_op = XEXP (x, 1);
9712 if (shift_op != NULL)
9714 if (shift_by_reg != NULL)
9716 if (speed_p)
9717 *cost += extra_cost->alu.arith_shift_reg;
9718 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9720 else if (speed_p)
9721 *cost += extra_cost->alu.arith_shift;
9723 *cost += (rtx_cost (shift_op, code, 0, speed_p)
9724 + rtx_cost (non_shift_op, code, 0, speed_p));
9725 return true;
9728 if (arm_arch_thumb2
9729 && GET_CODE (XEXP (x, 1)) == MULT)
9731 /* MLS. */
9732 if (speed_p)
9733 *cost += extra_cost->mult[0].add;
9734 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9735 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9736 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9737 return true;
9740 if (CONST_INT_P (XEXP (x, 0)))
9742 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9743 INTVAL (XEXP (x, 0)), NULL_RTX,
9744 NULL_RTX, 1, 0);
9745 *cost = COSTS_N_INSNS (insns);
9746 if (speed_p)
9747 *cost += insns * extra_cost->alu.arith;
9748 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9749 return true;
9751 else if (speed_p)
9752 *cost += extra_cost->alu.arith;
9754 return false;
9757 if (GET_MODE_CLASS (mode) == MODE_INT
9758 && GET_MODE_SIZE (mode) < 4)
9760 rtx shift_op, shift_reg;
9761 shift_reg = NULL;
9763 /* We check both sides of the MINUS for shifter operands since,
9764 unlike PLUS, it's not commutative. */
9766 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9767 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9769 /* Slightly disparage, as we might need to widen the result. */
9770 *cost = 1 + COSTS_N_INSNS (1);
9771 if (speed_p)
9772 *cost += extra_cost->alu.arith;
9774 if (CONST_INT_P (XEXP (x, 0)))
9776 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9777 return true;
9780 return false;
9783 if (mode == DImode)
9785 *cost = COSTS_N_INSNS (2);
9787 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9789 rtx op1 = XEXP (x, 1);
9791 if (speed_p)
9792 *cost += 2 * extra_cost->alu.arith;
9794 if (GET_CODE (op1) == ZERO_EXTEND)
9795 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9796 else
9797 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9798 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9799 0, speed_p);
9800 return true;
9802 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9804 if (speed_p)
9805 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9806 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9807 0, speed_p)
9808 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9809 return true;
9811 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9812 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9814 if (speed_p)
9815 *cost += (extra_cost->alu.arith
9816 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9817 ? extra_cost->alu.arith
9818 : extra_cost->alu.arith_shift));
9819 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9820 + rtx_cost (XEXP (XEXP (x, 1), 0),
9821 GET_CODE (XEXP (x, 1)), 0, speed_p));
9822 return true;
9825 if (speed_p)
9826 *cost += 2 * extra_cost->alu.arith;
9827 return false;
9830 /* Vector mode? */
9832 *cost = LIBCALL_COST (2);
9833 return false;
9835 case PLUS:
9836 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9837 && (mode == SFmode || !TARGET_VFP_SINGLE))
9839 *cost = COSTS_N_INSNS (1);
9840 if (GET_CODE (XEXP (x, 0)) == MULT)
9842 rtx mul_op0, mul_op1, add_op;
9844 if (speed_p)
9845 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9847 mul_op0 = XEXP (XEXP (x, 0), 0);
9848 mul_op1 = XEXP (XEXP (x, 0), 1);
9849 add_op = XEXP (x, 1);
9851 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9852 + rtx_cost (mul_op1, code, 0, speed_p)
9853 + rtx_cost (add_op, code, 0, speed_p));
9855 return true;
9858 if (speed_p)
9859 *cost += extra_cost->fp[mode != SFmode].addsub;
9860 return false;
9862 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9864 *cost = LIBCALL_COST (2);
9865 return false;
9868 /* Narrow modes can be synthesized in SImode, but the range
9869 of useful sub-operations is limited. Check for shift operations
9870 on one of the operands. Only left shifts can be used in the
9871 narrow modes. */
9872 if (GET_MODE_CLASS (mode) == MODE_INT
9873 && GET_MODE_SIZE (mode) < 4)
9875 rtx shift_op, shift_reg;
9876 shift_reg = NULL;
9878 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9880 if (CONST_INT_P (XEXP (x, 1)))
9882 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9883 INTVAL (XEXP (x, 1)), NULL_RTX,
9884 NULL_RTX, 1, 0);
9885 *cost = COSTS_N_INSNS (insns);
9886 if (speed_p)
9887 *cost += insns * extra_cost->alu.arith;
9888 /* Slightly penalize a narrow operation as the result may
9889 need widening. */
9890 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9891 return true;
9894 /* Slightly penalize a narrow operation as the result may
9895 need widening. */
9896 *cost = 1 + COSTS_N_INSNS (1);
9897 if (speed_p)
9898 *cost += extra_cost->alu.arith;
9900 return false;
9903 if (mode == SImode)
9905 rtx shift_op, shift_reg;
9907 *cost = COSTS_N_INSNS (1);
9908 if (TARGET_INT_SIMD
9909 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9910 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9912 /* UXTA[BH] or SXTA[BH]. */
9913 if (speed_p)
9914 *cost += extra_cost->alu.extend_arith;
9915 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9916 speed_p)
9917 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9918 return true;
9921 shift_reg = NULL;
9922 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9923 if (shift_op != NULL)
9925 if (shift_reg)
9927 if (speed_p)
9928 *cost += extra_cost->alu.arith_shift_reg;
9929 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9931 else if (speed_p)
9932 *cost += extra_cost->alu.arith_shift;
9934 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9935 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9936 return true;
9938 if (GET_CODE (XEXP (x, 0)) == MULT)
9940 rtx mul_op = XEXP (x, 0);
9942 *cost = COSTS_N_INSNS (1);
9944 if (TARGET_DSP_MULTIPLY
9945 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9946 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9947 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9948 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9949 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9950 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9951 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9952 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9953 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9954 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9955 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9956 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9957 == 16))))))
9959 /* SMLA[BT][BT]. */
9960 if (speed_p)
9961 *cost += extra_cost->mult[0].extend_add;
9962 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9963 SIGN_EXTEND, 0, speed_p)
9964 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9965 SIGN_EXTEND, 0, speed_p)
9966 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9967 return true;
9970 if (speed_p)
9971 *cost += extra_cost->mult[0].add;
9972 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9973 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9974 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9975 return true;
9977 if (CONST_INT_P (XEXP (x, 1)))
9979 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9980 INTVAL (XEXP (x, 1)), NULL_RTX,
9981 NULL_RTX, 1, 0);
9982 *cost = COSTS_N_INSNS (insns);
9983 if (speed_p)
9984 *cost += insns * extra_cost->alu.arith;
9985 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9986 return true;
9988 else if (speed_p)
9989 *cost += extra_cost->alu.arith;
9991 return false;
9994 if (mode == DImode)
9996 if (arm_arch3m
9997 && GET_CODE (XEXP (x, 0)) == MULT
9998 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9999 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10000 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10001 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10003 *cost = COSTS_N_INSNS (1);
10004 if (speed_p)
10005 *cost += extra_cost->mult[1].extend_add;
10006 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
10007 ZERO_EXTEND, 0, speed_p)
10008 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
10009 ZERO_EXTEND, 0, speed_p)
10010 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10011 return true;
10014 *cost = COSTS_N_INSNS (2);
10016 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10017 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10019 if (speed_p)
10020 *cost += (extra_cost->alu.arith
10021 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10022 ? extra_cost->alu.arith
10023 : extra_cost->alu.arith_shift));
10025 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
10026 speed_p)
10027 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10028 return true;
10031 if (speed_p)
10032 *cost += 2 * extra_cost->alu.arith;
10033 return false;
10036 /* Vector mode? */
10037 *cost = LIBCALL_COST (2);
10038 return false;
10039 case IOR:
10040 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10042 *cost = COSTS_N_INSNS (1);
10043 if (speed_p)
10044 *cost += extra_cost->alu.rev;
10046 return true;
10048 /* Fall through. */
10049 case AND: case XOR:
10050 if (mode == SImode)
10052 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10053 rtx op0 = XEXP (x, 0);
10054 rtx shift_op, shift_reg;
10056 *cost = COSTS_N_INSNS (1);
10058 if (subcode == NOT
10059 && (code == AND
10060 || (code == IOR && TARGET_THUMB2)))
10061 op0 = XEXP (op0, 0);
10063 shift_reg = NULL;
10064 shift_op = shifter_op_p (op0, &shift_reg);
10065 if (shift_op != NULL)
10067 if (shift_reg)
10069 if (speed_p)
10070 *cost += extra_cost->alu.log_shift_reg;
10071 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10073 else if (speed_p)
10074 *cost += extra_cost->alu.log_shift;
10076 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10077 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10078 return true;
10081 if (CONST_INT_P (XEXP (x, 1)))
10083 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10084 INTVAL (XEXP (x, 1)), NULL_RTX,
10085 NULL_RTX, 1, 0);
10087 *cost = COSTS_N_INSNS (insns);
10088 if (speed_p)
10089 *cost += insns * extra_cost->alu.logical;
10090 *cost += rtx_cost (op0, code, 0, speed_p);
10091 return true;
10094 if (speed_p)
10095 *cost += extra_cost->alu.logical;
10096 *cost += (rtx_cost (op0, code, 0, speed_p)
10097 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10098 return true;
10101 if (mode == DImode)
10103 rtx op0 = XEXP (x, 0);
10104 enum rtx_code subcode = GET_CODE (op0);
10106 *cost = COSTS_N_INSNS (2);
10108 if (subcode == NOT
10109 && (code == AND
10110 || (code == IOR && TARGET_THUMB2)))
10111 op0 = XEXP (op0, 0);
10113 if (GET_CODE (op0) == ZERO_EXTEND)
10115 if (speed_p)
10116 *cost += 2 * extra_cost->alu.logical;
10118 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
10119 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10120 return true;
10122 else if (GET_CODE (op0) == SIGN_EXTEND)
10124 if (speed_p)
10125 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10127 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
10128 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10129 return true;
10132 if (speed_p)
10133 *cost += 2 * extra_cost->alu.logical;
10135 return true;
10137 /* Vector mode? */
10139 *cost = LIBCALL_COST (2);
10140 return false;
10142 case MULT:
10143 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10144 && (mode == SFmode || !TARGET_VFP_SINGLE))
10146 rtx op0 = XEXP (x, 0);
10148 *cost = COSTS_N_INSNS (1);
10150 if (GET_CODE (op0) == NEG)
10151 op0 = XEXP (op0, 0);
10153 if (speed_p)
10154 *cost += extra_cost->fp[mode != SFmode].mult;
10156 *cost += (rtx_cost (op0, MULT, 0, speed_p)
10157 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10158 return true;
10160 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10162 *cost = LIBCALL_COST (2);
10163 return false;
10166 if (mode == SImode)
10168 *cost = COSTS_N_INSNS (1);
10169 if (TARGET_DSP_MULTIPLY
10170 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10171 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10172 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10173 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10174 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10175 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10176 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10177 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10178 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10179 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10180 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10181 && (INTVAL (XEXP (XEXP (x, 1), 1))
10182 == 16))))))
10184 /* SMUL[TB][TB]. */
10185 if (speed_p)
10186 *cost += extra_cost->mult[0].extend;
10187 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10188 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10189 return true;
10191 if (speed_p)
10192 *cost += extra_cost->mult[0].simple;
10193 return false;
10196 if (mode == DImode)
10198 if (arm_arch3m
10199 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10200 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10201 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10202 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10204 *cost = COSTS_N_INSNS (1);
10205 if (speed_p)
10206 *cost += extra_cost->mult[1].extend;
10207 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10208 ZERO_EXTEND, 0, speed_p)
10209 + rtx_cost (XEXP (XEXP (x, 1), 0),
10210 ZERO_EXTEND, 0, speed_p));
10211 return true;
10214 *cost = LIBCALL_COST (2);
10215 return false;
10218 /* Vector mode? */
10219 *cost = LIBCALL_COST (2);
10220 return false;
10222 case NEG:
10223 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10224 && (mode == SFmode || !TARGET_VFP_SINGLE))
10226 *cost = COSTS_N_INSNS (1);
10227 if (speed_p)
10228 *cost += extra_cost->fp[mode != SFmode].neg;
10230 return false;
10232 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10234 *cost = LIBCALL_COST (1);
10235 return false;
10238 if (mode == SImode)
10240 if (GET_CODE (XEXP (x, 0)) == ABS)
10242 *cost = COSTS_N_INSNS (2);
10243 /* Assume the non-flag-changing variant. */
10244 if (speed_p)
10245 *cost += (extra_cost->alu.log_shift
10246 + extra_cost->alu.arith_shift);
10247 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10248 return true;
10251 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10252 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10254 *cost = COSTS_N_INSNS (2);
10255 /* No extra cost for MOV imm and MVN imm. */
10256 /* If the comparison op is using the flags, there's no further
10257 cost, otherwise we need to add the cost of the comparison. */
10258 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10259 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10260 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10262 *cost += (COSTS_N_INSNS (1)
10263 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10264 speed_p)
10265 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10266 speed_p));
10267 if (speed_p)
10268 *cost += extra_cost->alu.arith;
10270 return true;
10272 *cost = COSTS_N_INSNS (1);
10273 if (speed_p)
10274 *cost += extra_cost->alu.arith;
10275 return false;
10278 if (GET_MODE_CLASS (mode) == MODE_INT
10279 && GET_MODE_SIZE (mode) < 4)
10281 /* Slightly disparage, as we might need an extend operation. */
10282 *cost = 1 + COSTS_N_INSNS (1);
10283 if (speed_p)
10284 *cost += extra_cost->alu.arith;
10285 return false;
10288 if (mode == DImode)
10290 *cost = COSTS_N_INSNS (2);
10291 if (speed_p)
10292 *cost += 2 * extra_cost->alu.arith;
10293 return false;
10296 /* Vector mode? */
10297 *cost = LIBCALL_COST (1);
10298 return false;
10300 case NOT:
10301 if (mode == SImode)
10303 rtx shift_op;
10304 rtx shift_reg = NULL;
10306 *cost = COSTS_N_INSNS (1);
10307 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10309 if (shift_op)
10311 if (shift_reg != NULL)
10313 if (speed_p)
10314 *cost += extra_cost->alu.log_shift_reg;
10315 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10317 else if (speed_p)
10318 *cost += extra_cost->alu.log_shift;
10319 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10320 return true;
10323 if (speed_p)
10324 *cost += extra_cost->alu.logical;
10325 return false;
10327 if (mode == DImode)
10329 *cost = COSTS_N_INSNS (2);
10330 return false;
10333 /* Vector mode? */
10335 *cost += LIBCALL_COST (1);
10336 return false;
10338 case IF_THEN_ELSE:
10340 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10342 *cost = COSTS_N_INSNS (4);
10343 return true;
10345 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10346 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10348 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10349 /* Assume that if one arm of the if_then_else is a register,
10350 that it will be tied with the result and eliminate the
10351 conditional insn. */
10352 if (REG_P (XEXP (x, 1)))
10353 *cost += op2cost;
10354 else if (REG_P (XEXP (x, 2)))
10355 *cost += op1cost;
10356 else
10358 if (speed_p)
10360 if (extra_cost->alu.non_exec_costs_exec)
10361 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10362 else
10363 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10365 else
10366 *cost += op1cost + op2cost;
10369 return true;
10371 case COMPARE:
10372 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10373 *cost = 0;
10374 else
10376 machine_mode op0mode;
10377 /* We'll mostly assume that the cost of a compare is the cost of the
10378 LHS. However, there are some notable exceptions. */
10380 /* Floating point compares are never done as side-effects. */
10381 op0mode = GET_MODE (XEXP (x, 0));
10382 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10383 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10385 *cost = COSTS_N_INSNS (1);
10386 if (speed_p)
10387 *cost += extra_cost->fp[op0mode != SFmode].compare;
10389 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10391 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10392 return true;
10395 return false;
10397 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10399 *cost = LIBCALL_COST (2);
10400 return false;
10403 /* DImode compares normally take two insns. */
10404 if (op0mode == DImode)
10406 *cost = COSTS_N_INSNS (2);
10407 if (speed_p)
10408 *cost += 2 * extra_cost->alu.arith;
10409 return false;
10412 if (op0mode == SImode)
10414 rtx shift_op;
10415 rtx shift_reg;
10417 if (XEXP (x, 1) == const0_rtx
10418 && !(REG_P (XEXP (x, 0))
10419 || (GET_CODE (XEXP (x, 0)) == SUBREG
10420 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10422 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10424 /* Multiply operations that set the flags are often
10425 significantly more expensive. */
10426 if (speed_p
10427 && GET_CODE (XEXP (x, 0)) == MULT
10428 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10429 *cost += extra_cost->mult[0].flag_setting;
10431 if (speed_p
10432 && GET_CODE (XEXP (x, 0)) == PLUS
10433 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10434 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10435 0), 1), mode))
10436 *cost += extra_cost->mult[0].flag_setting;
10437 return true;
10440 shift_reg = NULL;
10441 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10442 if (shift_op != NULL)
10444 *cost = COSTS_N_INSNS (1);
10445 if (shift_reg != NULL)
10447 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10448 if (speed_p)
10449 *cost += extra_cost->alu.arith_shift_reg;
10451 else if (speed_p)
10452 *cost += extra_cost->alu.arith_shift;
10453 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10454 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10455 return true;
10458 *cost = COSTS_N_INSNS (1);
10459 if (speed_p)
10460 *cost += extra_cost->alu.arith;
10461 if (CONST_INT_P (XEXP (x, 1))
10462 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10464 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10465 return true;
10467 return false;
10470 /* Vector mode? */
10472 *cost = LIBCALL_COST (2);
10473 return false;
10475 return true;
10477 case EQ:
10478 case NE:
10479 case LT:
10480 case LE:
10481 case GT:
10482 case GE:
10483 case LTU:
10484 case LEU:
10485 case GEU:
10486 case GTU:
10487 case ORDERED:
10488 case UNORDERED:
10489 case UNEQ:
10490 case UNLE:
10491 case UNLT:
10492 case UNGE:
10493 case UNGT:
10494 case LTGT:
10495 if (outer_code == SET)
10497 /* Is it a store-flag operation? */
10498 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10499 && XEXP (x, 1) == const0_rtx)
10501 /* Thumb also needs an IT insn. */
10502 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10503 return true;
10505 if (XEXP (x, 1) == const0_rtx)
10507 switch (code)
10509 case LT:
10510 /* LSR Rd, Rn, #31. */
10511 *cost = COSTS_N_INSNS (1);
10512 if (speed_p)
10513 *cost += extra_cost->alu.shift;
10514 break;
10516 case EQ:
10517 /* RSBS T1, Rn, #0
10518 ADC Rd, Rn, T1. */
10520 case NE:
10521 /* SUBS T1, Rn, #1
10522 SBC Rd, Rn, T1. */
10523 *cost = COSTS_N_INSNS (2);
10524 break;
10526 case LE:
10527 /* RSBS T1, Rn, Rn, LSR #31
10528 ADC Rd, Rn, T1. */
10529 *cost = COSTS_N_INSNS (2);
10530 if (speed_p)
10531 *cost += extra_cost->alu.arith_shift;
10532 break;
10534 case GT:
10535 /* RSB Rd, Rn, Rn, ASR #1
10536 LSR Rd, Rd, #31. */
10537 *cost = COSTS_N_INSNS (2);
10538 if (speed_p)
10539 *cost += (extra_cost->alu.arith_shift
10540 + extra_cost->alu.shift);
10541 break;
10543 case GE:
10544 /* ASR Rd, Rn, #31
10545 ADD Rd, Rn, #1. */
10546 *cost = COSTS_N_INSNS (2);
10547 if (speed_p)
10548 *cost += extra_cost->alu.shift;
10549 break;
10551 default:
10552 /* Remaining cases are either meaningless or would take
10553 three insns anyway. */
10554 *cost = COSTS_N_INSNS (3);
10555 break;
10557 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10558 return true;
10560 else
10562 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10563 if (CONST_INT_P (XEXP (x, 1))
10564 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10566 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10567 return true;
10570 return false;
10573 /* Not directly inside a set. If it involves the condition code
10574 register it must be the condition for a branch, cond_exec or
10575 I_T_E operation. Since the comparison is performed elsewhere
10576 this is just the control part which has no additional
10577 cost. */
10578 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10579 && XEXP (x, 1) == const0_rtx)
10581 *cost = 0;
10582 return true;
10584 return false;
10586 case ABS:
10587 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10588 && (mode == SFmode || !TARGET_VFP_SINGLE))
10590 *cost = COSTS_N_INSNS (1);
10591 if (speed_p)
10592 *cost += extra_cost->fp[mode != SFmode].neg;
10594 return false;
10596 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10598 *cost = LIBCALL_COST (1);
10599 return false;
10602 if (mode == SImode)
10604 *cost = COSTS_N_INSNS (1);
10605 if (speed_p)
10606 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10607 return false;
10609 /* Vector mode? */
10610 *cost = LIBCALL_COST (1);
10611 return false;
10613 case SIGN_EXTEND:
10614 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10615 && MEM_P (XEXP (x, 0)))
10617 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10619 if (mode == DImode)
10620 *cost += COSTS_N_INSNS (1);
10622 if (!speed_p)
10623 return true;
10625 if (GET_MODE (XEXP (x, 0)) == SImode)
10626 *cost += extra_cost->ldst.load;
10627 else
10628 *cost += extra_cost->ldst.load_sign_extend;
10630 if (mode == DImode)
10631 *cost += extra_cost->alu.shift;
10633 return true;
10636 /* Widening from less than 32-bits requires an extend operation. */
10637 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10639 /* We have SXTB/SXTH. */
10640 *cost = COSTS_N_INSNS (1);
10641 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10642 if (speed_p)
10643 *cost += extra_cost->alu.extend;
10645 else if (GET_MODE (XEXP (x, 0)) != SImode)
10647 /* Needs two shifts. */
10648 *cost = COSTS_N_INSNS (2);
10649 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10650 if (speed_p)
10651 *cost += 2 * extra_cost->alu.shift;
10654 /* Widening beyond 32-bits requires one more insn. */
10655 if (mode == DImode)
10657 *cost += COSTS_N_INSNS (1);
10658 if (speed_p)
10659 *cost += extra_cost->alu.shift;
10662 return true;
10664 case ZERO_EXTEND:
10665 if ((arm_arch4
10666 || GET_MODE (XEXP (x, 0)) == SImode
10667 || GET_MODE (XEXP (x, 0)) == QImode)
10668 && MEM_P (XEXP (x, 0)))
10670 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10672 if (mode == DImode)
10673 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10675 return true;
10678 /* Widening from less than 32-bits requires an extend operation. */
10679 if (GET_MODE (XEXP (x, 0)) == QImode)
10681 /* UXTB can be a shorter instruction in Thumb2, but it might
10682 be slower than the AND Rd, Rn, #255 alternative. When
10683 optimizing for speed it should never be slower to use
10684 AND, and we don't really model 16-bit vs 32-bit insns
10685 here. */
10686 *cost = COSTS_N_INSNS (1);
10687 if (speed_p)
10688 *cost += extra_cost->alu.logical;
10690 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10692 /* We have UXTB/UXTH. */
10693 *cost = COSTS_N_INSNS (1);
10694 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10695 if (speed_p)
10696 *cost += extra_cost->alu.extend;
10698 else if (GET_MODE (XEXP (x, 0)) != SImode)
10700 /* Needs two shifts. It's marginally preferable to use
10701 shifts rather than two BIC instructions as the second
10702 shift may merge with a subsequent insn as a shifter
10703 op. */
10704 *cost = COSTS_N_INSNS (2);
10705 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10706 if (speed_p)
10707 *cost += 2 * extra_cost->alu.shift;
10709 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10710 *cost = COSTS_N_INSNS (1);
10712 /* Widening beyond 32-bits requires one more insn. */
10713 if (mode == DImode)
10715 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10718 return true;
10720 case CONST_INT:
10721 *cost = 0;
10722 /* CONST_INT has no mode, so we cannot tell for sure how many
10723 insns are really going to be needed. The best we can do is
10724 look at the value passed. If it fits in SImode, then assume
10725 that's the mode it will be used for. Otherwise assume it
10726 will be used in DImode. */
10727 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10728 mode = SImode;
10729 else
10730 mode = DImode;
10732 /* Avoid blowing up in arm_gen_constant (). */
10733 if (!(outer_code == PLUS
10734 || outer_code == AND
10735 || outer_code == IOR
10736 || outer_code == XOR
10737 || outer_code == MINUS))
10738 outer_code = SET;
10740 const_int_cost:
10741 if (mode == SImode)
10743 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10744 INTVAL (x), NULL, NULL,
10745 0, 0));
10746 /* Extra costs? */
10748 else
10750 *cost += COSTS_N_INSNS (arm_gen_constant
10751 (outer_code, SImode, NULL,
10752 trunc_int_for_mode (INTVAL (x), SImode),
10753 NULL, NULL, 0, 0)
10754 + arm_gen_constant (outer_code, SImode, NULL,
10755 INTVAL (x) >> 32, NULL,
10756 NULL, 0, 0));
10757 /* Extra costs? */
10760 return true;
10762 case CONST:
10763 case LABEL_REF:
10764 case SYMBOL_REF:
10765 if (speed_p)
10767 if (arm_arch_thumb2 && !flag_pic)
10768 *cost = COSTS_N_INSNS (2);
10769 else
10770 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10772 else
10773 *cost = COSTS_N_INSNS (2);
10775 if (flag_pic)
10777 *cost += COSTS_N_INSNS (1);
10778 if (speed_p)
10779 *cost += extra_cost->alu.arith;
10782 return true;
10784 case CONST_FIXED:
10785 *cost = COSTS_N_INSNS (4);
10786 /* Fixme. */
10787 return true;
10789 case CONST_DOUBLE:
10790 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10791 && (mode == SFmode || !TARGET_VFP_SINGLE))
10793 if (vfp3_const_double_rtx (x))
10795 *cost = COSTS_N_INSNS (1);
10796 if (speed_p)
10797 *cost += extra_cost->fp[mode == DFmode].fpconst;
10798 return true;
10801 if (speed_p)
10803 *cost = COSTS_N_INSNS (1);
10804 if (mode == DFmode)
10805 *cost += extra_cost->ldst.loadd;
10806 else
10807 *cost += extra_cost->ldst.loadf;
10809 else
10810 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10812 return true;
10814 *cost = COSTS_N_INSNS (4);
10815 return true;
10817 case CONST_VECTOR:
10818 /* Fixme. */
10819 if (TARGET_NEON
10820 && TARGET_HARD_FLOAT
10821 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10822 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10823 *cost = COSTS_N_INSNS (1);
10824 else
10825 *cost = COSTS_N_INSNS (4);
10826 return true;
10828 case HIGH:
10829 case LO_SUM:
10830 *cost = COSTS_N_INSNS (1);
10831 /* When optimizing for size, we prefer constant pool entries to
10832 MOVW/MOVT pairs, so bump the cost of these slightly. */
10833 if (!speed_p)
10834 *cost += 1;
10835 return true;
10837 case CLZ:
10838 *cost = COSTS_N_INSNS (1);
10839 if (speed_p)
10840 *cost += extra_cost->alu.clz;
10841 return false;
10843 case SMIN:
10844 if (XEXP (x, 1) == const0_rtx)
10846 *cost = COSTS_N_INSNS (1);
10847 if (speed_p)
10848 *cost += extra_cost->alu.log_shift;
10849 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10850 return true;
10852 /* Fall through. */
10853 case SMAX:
10854 case UMIN:
10855 case UMAX:
10856 *cost = COSTS_N_INSNS (2);
10857 return false;
10859 case TRUNCATE:
10860 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10861 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10862 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10863 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10864 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10865 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10866 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10867 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10868 == ZERO_EXTEND))))
10870 *cost = COSTS_N_INSNS (1);
10871 if (speed_p)
10872 *cost += extra_cost->mult[1].extend;
10873 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10874 speed_p)
10875 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10876 0, speed_p));
10877 return true;
10879 *cost = LIBCALL_COST (1);
10880 return false;
10882 case UNSPEC:
10883 return arm_unspec_cost (x, outer_code, speed_p, cost);
10885 case PC:
10886 /* Reading the PC is like reading any other register. Writing it
10887 is more expensive, but we take that into account elsewhere. */
10888 *cost = 0;
10889 return true;
10891 case ZERO_EXTRACT:
10892 /* TODO: Simple zero_extract of bottom bits using AND. */
10893 /* Fall through. */
10894 case SIGN_EXTRACT:
10895 if (arm_arch6
10896 && mode == SImode
10897 && CONST_INT_P (XEXP (x, 1))
10898 && CONST_INT_P (XEXP (x, 2)))
10900 *cost = COSTS_N_INSNS (1);
10901 if (speed_p)
10902 *cost += extra_cost->alu.bfx;
10903 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10904 return true;
10906 /* Without UBFX/SBFX, need to resort to shift operations. */
10907 *cost = COSTS_N_INSNS (2);
10908 if (speed_p)
10909 *cost += 2 * extra_cost->alu.shift;
10910 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10911 return true;
10913 case FLOAT_EXTEND:
10914 if (TARGET_HARD_FLOAT)
10916 *cost = COSTS_N_INSNS (1);
10917 if (speed_p)
10918 *cost += extra_cost->fp[mode == DFmode].widen;
10919 if (!TARGET_FPU_ARMV8
10920 && GET_MODE (XEXP (x, 0)) == HFmode)
10922 /* Pre v8, widening HF->DF is a two-step process, first
10923 widening to SFmode. */
10924 *cost += COSTS_N_INSNS (1);
10925 if (speed_p)
10926 *cost += extra_cost->fp[0].widen;
10928 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10929 return true;
10932 *cost = LIBCALL_COST (1);
10933 return false;
10935 case FLOAT_TRUNCATE:
10936 if (TARGET_HARD_FLOAT)
10938 *cost = COSTS_N_INSNS (1);
10939 if (speed_p)
10940 *cost += extra_cost->fp[mode == DFmode].narrow;
10941 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10942 return true;
10943 /* Vector modes? */
10945 *cost = LIBCALL_COST (1);
10946 return false;
10948 case FMA:
10949 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10951 rtx op0 = XEXP (x, 0);
10952 rtx op1 = XEXP (x, 1);
10953 rtx op2 = XEXP (x, 2);
10955 *cost = COSTS_N_INSNS (1);
10957 /* vfms or vfnma. */
10958 if (GET_CODE (op0) == NEG)
10959 op0 = XEXP (op0, 0);
10961 /* vfnms or vfnma. */
10962 if (GET_CODE (op2) == NEG)
10963 op2 = XEXP (op2, 0);
10965 *cost += rtx_cost (op0, FMA, 0, speed_p);
10966 *cost += rtx_cost (op1, FMA, 1, speed_p);
10967 *cost += rtx_cost (op2, FMA, 2, speed_p);
10969 if (speed_p)
10970 *cost += extra_cost->fp[mode ==DFmode].fma;
10972 return true;
10975 *cost = LIBCALL_COST (3);
10976 return false;
10978 case FIX:
10979 case UNSIGNED_FIX:
10980 if (TARGET_HARD_FLOAT)
10982 if (GET_MODE_CLASS (mode) == MODE_INT)
10984 *cost = COSTS_N_INSNS (1);
10985 if (speed_p)
10986 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
10987 /* Strip of the 'cost' of rounding towards zero. */
10988 if (GET_CODE (XEXP (x, 0)) == FIX)
10989 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
10990 else
10991 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10992 /* ??? Increase the cost to deal with transferring from
10993 FP -> CORE registers? */
10994 return true;
10996 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10997 && TARGET_FPU_ARMV8)
10999 *cost = COSTS_N_INSNS (1);
11000 if (speed_p)
11001 *cost += extra_cost->fp[mode == DFmode].roundint;
11002 return false;
11004 /* Vector costs? */
11006 *cost = LIBCALL_COST (1);
11007 return false;
11009 case FLOAT:
11010 case UNSIGNED_FLOAT:
11011 if (TARGET_HARD_FLOAT)
11013 /* ??? Increase the cost to deal with transferring from CORE
11014 -> FP registers? */
11015 *cost = COSTS_N_INSNS (1);
11016 if (speed_p)
11017 *cost += extra_cost->fp[mode == DFmode].fromint;
11018 return false;
11020 *cost = LIBCALL_COST (1);
11021 return false;
11023 case CALL:
11024 *cost = COSTS_N_INSNS (1);
11025 return true;
11027 case ASM_OPERANDS:
11029 /* Just a guess. Guess number of instructions in the asm
11030 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11031 though (see PR60663). */
11032 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11033 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11035 *cost = COSTS_N_INSNS (asm_length + num_operands);
11036 return true;
11038 default:
11039 if (mode != VOIDmode)
11040 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11041 else
11042 *cost = COSTS_N_INSNS (4); /* Who knows? */
11043 return false;
11047 #undef HANDLE_NARROW_SHIFT_ARITH
11049 /* RTX costs when optimizing for size. */
11050 static bool
11051 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
11052 int *total, bool speed)
11054 bool result;
11056 if (TARGET_OLD_RTX_COSTS
11057 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
11059 /* Old way. (Deprecated.) */
11060 if (!speed)
11061 result = arm_size_rtx_costs (x, (enum rtx_code) code,
11062 (enum rtx_code) outer_code, total);
11063 else
11064 result = current_tune->rtx_costs (x, (enum rtx_code) code,
11065 (enum rtx_code) outer_code, total,
11066 speed);
11068 else
11070 /* New way. */
11071 if (current_tune->insn_extra_cost)
11072 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11073 (enum rtx_code) outer_code,
11074 current_tune->insn_extra_cost,
11075 total, speed);
11076 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11077 && current_tune->insn_extra_cost != NULL */
11078 else
11079 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11080 (enum rtx_code) outer_code,
11081 &generic_extra_costs, total, speed);
11084 if (dump_file && (dump_flags & TDF_DETAILS))
11086 print_rtl_single (dump_file, x);
11087 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11088 *total, result ? "final" : "partial");
11090 return result;
11093 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11094 supported on any "slowmul" cores, so it can be ignored. */
11096 static bool
11097 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11098 int *total, bool speed)
11100 machine_mode mode = GET_MODE (x);
11102 if (TARGET_THUMB)
11104 *total = thumb1_rtx_costs (x, code, outer_code);
11105 return true;
11108 switch (code)
11110 case MULT:
11111 if (GET_MODE_CLASS (mode) == MODE_FLOAT
11112 || mode == DImode)
11114 *total = COSTS_N_INSNS (20);
11115 return false;
11118 if (CONST_INT_P (XEXP (x, 1)))
11120 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11121 & (unsigned HOST_WIDE_INT) 0xffffffff);
11122 int cost, const_ok = const_ok_for_arm (i);
11123 int j, booth_unit_size;
11125 /* Tune as appropriate. */
11126 cost = const_ok ? 4 : 8;
11127 booth_unit_size = 2;
11128 for (j = 0; i && j < 32; j += booth_unit_size)
11130 i >>= booth_unit_size;
11131 cost++;
11134 *total = COSTS_N_INSNS (cost);
11135 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
11136 return true;
11139 *total = COSTS_N_INSNS (20);
11140 return false;
11142 default:
11143 return arm_rtx_costs_1 (x, outer_code, total, speed);;
11148 /* RTX cost for cores with a fast multiply unit (M variants). */
11150 static bool
11151 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11152 int *total, bool speed)
11154 machine_mode mode = GET_MODE (x);
11156 if (TARGET_THUMB1)
11158 *total = thumb1_rtx_costs (x, code, outer_code);
11159 return true;
11162 /* ??? should thumb2 use different costs? */
11163 switch (code)
11165 case MULT:
11166 /* There is no point basing this on the tuning, since it is always the
11167 fast variant if it exists at all. */
11168 if (mode == DImode
11169 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11170 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11171 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11173 *total = COSTS_N_INSNS(2);
11174 return false;
11178 if (mode == DImode)
11180 *total = COSTS_N_INSNS (5);
11181 return false;
11184 if (CONST_INT_P (XEXP (x, 1)))
11186 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11187 & (unsigned HOST_WIDE_INT) 0xffffffff);
11188 int cost, const_ok = const_ok_for_arm (i);
11189 int j, booth_unit_size;
11191 /* Tune as appropriate. */
11192 cost = const_ok ? 4 : 8;
11193 booth_unit_size = 8;
11194 for (j = 0; i && j < 32; j += booth_unit_size)
11196 i >>= booth_unit_size;
11197 cost++;
11200 *total = COSTS_N_INSNS(cost);
11201 return false;
11204 if (mode == SImode)
11206 *total = COSTS_N_INSNS (4);
11207 return false;
11210 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11212 if (TARGET_HARD_FLOAT
11213 && (mode == SFmode
11214 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11216 *total = COSTS_N_INSNS (1);
11217 return false;
11221 /* Requires a lib call */
11222 *total = COSTS_N_INSNS (20);
11223 return false;
11225 default:
11226 return arm_rtx_costs_1 (x, outer_code, total, speed);
11231 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11232 so it can be ignored. */
11234 static bool
11235 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11236 int *total, bool speed)
11238 machine_mode mode = GET_MODE (x);
11240 if (TARGET_THUMB)
11242 *total = thumb1_rtx_costs (x, code, outer_code);
11243 return true;
11246 switch (code)
11248 case COMPARE:
11249 if (GET_CODE (XEXP (x, 0)) != MULT)
11250 return arm_rtx_costs_1 (x, outer_code, total, speed);
11252 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11253 will stall until the multiplication is complete. */
11254 *total = COSTS_N_INSNS (3);
11255 return false;
11257 case MULT:
11258 /* There is no point basing this on the tuning, since it is always the
11259 fast variant if it exists at all. */
11260 if (mode == DImode
11261 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11262 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11263 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11265 *total = COSTS_N_INSNS (2);
11266 return false;
11270 if (mode == DImode)
11272 *total = COSTS_N_INSNS (5);
11273 return false;
11276 if (CONST_INT_P (XEXP (x, 1)))
11278 /* If operand 1 is a constant we can more accurately
11279 calculate the cost of the multiply. The multiplier can
11280 retire 15 bits on the first cycle and a further 12 on the
11281 second. We do, of course, have to load the constant into
11282 a register first. */
11283 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11284 /* There's a general overhead of one cycle. */
11285 int cost = 1;
11286 unsigned HOST_WIDE_INT masked_const;
11288 if (i & 0x80000000)
11289 i = ~i;
11291 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11293 masked_const = i & 0xffff8000;
11294 if (masked_const != 0)
11296 cost++;
11297 masked_const = i & 0xf8000000;
11298 if (masked_const != 0)
11299 cost++;
11301 *total = COSTS_N_INSNS (cost);
11302 return false;
11305 if (mode == SImode)
11307 *total = COSTS_N_INSNS (3);
11308 return false;
11311 /* Requires a lib call */
11312 *total = COSTS_N_INSNS (20);
11313 return false;
11315 default:
11316 return arm_rtx_costs_1 (x, outer_code, total, speed);
11321 /* RTX costs for 9e (and later) cores. */
11323 static bool
11324 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11325 int *total, bool speed)
11327 machine_mode mode = GET_MODE (x);
11329 if (TARGET_THUMB1)
11331 switch (code)
11333 case MULT:
11334 /* Small multiply: 32 cycles for an integer multiply inst. */
11335 if (arm_arch6m && arm_m_profile_small_mul)
11336 *total = COSTS_N_INSNS (32);
11337 else
11338 *total = COSTS_N_INSNS (3);
11339 return true;
11341 default:
11342 *total = thumb1_rtx_costs (x, code, outer_code);
11343 return true;
11347 switch (code)
11349 case MULT:
11350 /* There is no point basing this on the tuning, since it is always the
11351 fast variant if it exists at all. */
11352 if (mode == DImode
11353 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11354 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11355 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11357 *total = COSTS_N_INSNS (2);
11358 return false;
11362 if (mode == DImode)
11364 *total = COSTS_N_INSNS (5);
11365 return false;
11368 if (mode == SImode)
11370 *total = COSTS_N_INSNS (2);
11371 return false;
11374 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11376 if (TARGET_HARD_FLOAT
11377 && (mode == SFmode
11378 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11380 *total = COSTS_N_INSNS (1);
11381 return false;
11385 *total = COSTS_N_INSNS (20);
11386 return false;
11388 default:
11389 return arm_rtx_costs_1 (x, outer_code, total, speed);
11392 /* All address computations that can be done are free, but rtx cost returns
11393 the same for practically all of them. So we weight the different types
11394 of address here in the order (most pref first):
11395 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11396 static inline int
11397 arm_arm_address_cost (rtx x)
11399 enum rtx_code c = GET_CODE (x);
11401 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11402 return 0;
11403 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11404 return 10;
11406 if (c == PLUS)
11408 if (CONST_INT_P (XEXP (x, 1)))
11409 return 2;
11411 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11412 return 3;
11414 return 4;
11417 return 6;
11420 static inline int
11421 arm_thumb_address_cost (rtx x)
11423 enum rtx_code c = GET_CODE (x);
11425 if (c == REG)
11426 return 1;
11427 if (c == PLUS
11428 && REG_P (XEXP (x, 0))
11429 && CONST_INT_P (XEXP (x, 1)))
11430 return 1;
11432 return 2;
11435 static int
11436 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11437 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11439 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11442 /* Adjust cost hook for XScale. */
11443 static bool
11444 xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11446 /* Some true dependencies can have a higher cost depending
11447 on precisely how certain input operands are used. */
11448 if (REG_NOTE_KIND(link) == 0
11449 && recog_memoized (insn) >= 0
11450 && recog_memoized (dep) >= 0)
11452 int shift_opnum = get_attr_shift (insn);
11453 enum attr_type attr_type = get_attr_type (dep);
11455 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11456 operand for INSN. If we have a shifted input operand and the
11457 instruction we depend on is another ALU instruction, then we may
11458 have to account for an additional stall. */
11459 if (shift_opnum != 0
11460 && (attr_type == TYPE_ALU_SHIFT_IMM
11461 || attr_type == TYPE_ALUS_SHIFT_IMM
11462 || attr_type == TYPE_LOGIC_SHIFT_IMM
11463 || attr_type == TYPE_LOGICS_SHIFT_IMM
11464 || attr_type == TYPE_ALU_SHIFT_REG
11465 || attr_type == TYPE_ALUS_SHIFT_REG
11466 || attr_type == TYPE_LOGIC_SHIFT_REG
11467 || attr_type == TYPE_LOGICS_SHIFT_REG
11468 || attr_type == TYPE_MOV_SHIFT
11469 || attr_type == TYPE_MVN_SHIFT
11470 || attr_type == TYPE_MOV_SHIFT_REG
11471 || attr_type == TYPE_MVN_SHIFT_REG))
11473 rtx shifted_operand;
11474 int opno;
11476 /* Get the shifted operand. */
11477 extract_insn (insn);
11478 shifted_operand = recog_data.operand[shift_opnum];
11480 /* Iterate over all the operands in DEP. If we write an operand
11481 that overlaps with SHIFTED_OPERAND, then we have increase the
11482 cost of this dependency. */
11483 extract_insn (dep);
11484 preprocess_constraints (dep);
11485 for (opno = 0; opno < recog_data.n_operands; opno++)
11487 /* We can ignore strict inputs. */
11488 if (recog_data.operand_type[opno] == OP_IN)
11489 continue;
11491 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11492 shifted_operand))
11494 *cost = 2;
11495 return false;
11500 return true;
11503 /* Adjust cost hook for Cortex A9. */
11504 static bool
11505 cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11507 switch (REG_NOTE_KIND (link))
11509 case REG_DEP_ANTI:
11510 *cost = 0;
11511 return false;
11513 case REG_DEP_TRUE:
11514 case REG_DEP_OUTPUT:
11515 if (recog_memoized (insn) >= 0
11516 && recog_memoized (dep) >= 0)
11518 if (GET_CODE (PATTERN (insn)) == SET)
11520 if (GET_MODE_CLASS
11521 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11522 || GET_MODE_CLASS
11523 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11525 enum attr_type attr_type_insn = get_attr_type (insn);
11526 enum attr_type attr_type_dep = get_attr_type (dep);
11528 /* By default all dependencies of the form
11529 s0 = s0 <op> s1
11530 s0 = s0 <op> s2
11531 have an extra latency of 1 cycle because
11532 of the input and output dependency in this
11533 case. However this gets modeled as an true
11534 dependency and hence all these checks. */
11535 if (REG_P (SET_DEST (PATTERN (insn)))
11536 && REG_P (SET_DEST (PATTERN (dep)))
11537 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11538 SET_DEST (PATTERN (dep))))
11540 /* FMACS is a special case where the dependent
11541 instruction can be issued 3 cycles before
11542 the normal latency in case of an output
11543 dependency. */
11544 if ((attr_type_insn == TYPE_FMACS
11545 || attr_type_insn == TYPE_FMACD)
11546 && (attr_type_dep == TYPE_FMACS
11547 || attr_type_dep == TYPE_FMACD))
11549 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11550 *cost = insn_default_latency (dep) - 3;
11551 else
11552 *cost = insn_default_latency (dep);
11553 return false;
11555 else
11557 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11558 *cost = insn_default_latency (dep) + 1;
11559 else
11560 *cost = insn_default_latency (dep);
11562 return false;
11567 break;
11569 default:
11570 gcc_unreachable ();
11573 return true;
11576 /* Adjust cost hook for FA726TE. */
11577 static bool
11578 fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11580 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11581 have penalty of 3. */
11582 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11583 && recog_memoized (insn) >= 0
11584 && recog_memoized (dep) >= 0
11585 && get_attr_conds (dep) == CONDS_SET)
11587 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11588 if (get_attr_conds (insn) == CONDS_USE
11589 && get_attr_type (insn) != TYPE_BRANCH)
11591 *cost = 3;
11592 return false;
11595 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11596 || get_attr_conds (insn) == CONDS_USE)
11598 *cost = 0;
11599 return false;
11603 return true;
11606 /* Implement TARGET_REGISTER_MOVE_COST.
11608 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11609 it is typically more expensive than a single memory access. We set
11610 the cost to less than two memory accesses so that floating
11611 point to integer conversion does not go through memory. */
11614 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11615 reg_class_t from, reg_class_t to)
11617 if (TARGET_32BIT)
11619 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11620 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11621 return 15;
11622 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11623 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11624 return 4;
11625 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11626 return 20;
11627 else
11628 return 2;
11630 else
11632 if (from == HI_REGS || to == HI_REGS)
11633 return 4;
11634 else
11635 return 2;
11639 /* Implement TARGET_MEMORY_MOVE_COST. */
11642 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11643 bool in ATTRIBUTE_UNUSED)
11645 if (TARGET_32BIT)
11646 return 10;
11647 else
11649 if (GET_MODE_SIZE (mode) < 4)
11650 return 8;
11651 else
11652 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11656 /* Vectorizer cost model implementation. */
11658 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11659 static int
11660 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11661 tree vectype,
11662 int misalign ATTRIBUTE_UNUSED)
11664 unsigned elements;
11666 switch (type_of_cost)
11668 case scalar_stmt:
11669 return current_tune->vec_costs->scalar_stmt_cost;
11671 case scalar_load:
11672 return current_tune->vec_costs->scalar_load_cost;
11674 case scalar_store:
11675 return current_tune->vec_costs->scalar_store_cost;
11677 case vector_stmt:
11678 return current_tune->vec_costs->vec_stmt_cost;
11680 case vector_load:
11681 return current_tune->vec_costs->vec_align_load_cost;
11683 case vector_store:
11684 return current_tune->vec_costs->vec_store_cost;
11686 case vec_to_scalar:
11687 return current_tune->vec_costs->vec_to_scalar_cost;
11689 case scalar_to_vec:
11690 return current_tune->vec_costs->scalar_to_vec_cost;
11692 case unaligned_load:
11693 return current_tune->vec_costs->vec_unalign_load_cost;
11695 case unaligned_store:
11696 return current_tune->vec_costs->vec_unalign_store_cost;
11698 case cond_branch_taken:
11699 return current_tune->vec_costs->cond_taken_branch_cost;
11701 case cond_branch_not_taken:
11702 return current_tune->vec_costs->cond_not_taken_branch_cost;
11704 case vec_perm:
11705 case vec_promote_demote:
11706 return current_tune->vec_costs->vec_stmt_cost;
11708 case vec_construct:
11709 elements = TYPE_VECTOR_SUBPARTS (vectype);
11710 return elements / 2 + 1;
11712 default:
11713 gcc_unreachable ();
11717 /* Implement targetm.vectorize.add_stmt_cost. */
11719 static unsigned
11720 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11721 struct _stmt_vec_info *stmt_info, int misalign,
11722 enum vect_cost_model_location where)
11724 unsigned *cost = (unsigned *) data;
11725 unsigned retval = 0;
11727 if (flag_vect_cost_model)
11729 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11730 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11732 /* Statements in an inner loop relative to the loop being
11733 vectorized are weighted more heavily. The value here is
11734 arbitrary and could potentially be improved with analysis. */
11735 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11736 count *= 50; /* FIXME. */
11738 retval = (unsigned) (count * stmt_cost);
11739 cost[where] += retval;
11742 return retval;
11745 /* Return true if and only if this insn can dual-issue only as older. */
11746 static bool
11747 cortexa7_older_only (rtx_insn *insn)
11749 if (recog_memoized (insn) < 0)
11750 return false;
11752 switch (get_attr_type (insn))
11754 case TYPE_ALU_DSP_REG:
11755 case TYPE_ALU_SREG:
11756 case TYPE_ALUS_SREG:
11757 case TYPE_LOGIC_REG:
11758 case TYPE_LOGICS_REG:
11759 case TYPE_ADC_REG:
11760 case TYPE_ADCS_REG:
11761 case TYPE_ADR:
11762 case TYPE_BFM:
11763 case TYPE_REV:
11764 case TYPE_MVN_REG:
11765 case TYPE_SHIFT_IMM:
11766 case TYPE_SHIFT_REG:
11767 case TYPE_LOAD_BYTE:
11768 case TYPE_LOAD1:
11769 case TYPE_STORE1:
11770 case TYPE_FFARITHS:
11771 case TYPE_FADDS:
11772 case TYPE_FFARITHD:
11773 case TYPE_FADDD:
11774 case TYPE_FMOV:
11775 case TYPE_F_CVT:
11776 case TYPE_FCMPS:
11777 case TYPE_FCMPD:
11778 case TYPE_FCONSTS:
11779 case TYPE_FCONSTD:
11780 case TYPE_FMULS:
11781 case TYPE_FMACS:
11782 case TYPE_FMULD:
11783 case TYPE_FMACD:
11784 case TYPE_FDIVS:
11785 case TYPE_FDIVD:
11786 case TYPE_F_MRC:
11787 case TYPE_F_MRRC:
11788 case TYPE_F_FLAG:
11789 case TYPE_F_LOADS:
11790 case TYPE_F_STORES:
11791 return true;
11792 default:
11793 return false;
11797 /* Return true if and only if this insn can dual-issue as younger. */
11798 static bool
11799 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11801 if (recog_memoized (insn) < 0)
11803 if (verbose > 5)
11804 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11805 return false;
11808 switch (get_attr_type (insn))
11810 case TYPE_ALU_IMM:
11811 case TYPE_ALUS_IMM:
11812 case TYPE_LOGIC_IMM:
11813 case TYPE_LOGICS_IMM:
11814 case TYPE_EXTEND:
11815 case TYPE_MVN_IMM:
11816 case TYPE_MOV_IMM:
11817 case TYPE_MOV_REG:
11818 case TYPE_MOV_SHIFT:
11819 case TYPE_MOV_SHIFT_REG:
11820 case TYPE_BRANCH:
11821 case TYPE_CALL:
11822 return true;
11823 default:
11824 return false;
11829 /* Look for an instruction that can dual issue only as an older
11830 instruction, and move it in front of any instructions that can
11831 dual-issue as younger, while preserving the relative order of all
11832 other instructions in the ready list. This is a hueuristic to help
11833 dual-issue in later cycles, by postponing issue of more flexible
11834 instructions. This heuristic may affect dual issue opportunities
11835 in the current cycle. */
11836 static void
11837 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11838 int *n_readyp, int clock)
11840 int i;
11841 int first_older_only = -1, first_younger = -1;
11843 if (verbose > 5)
11844 fprintf (file,
11845 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11846 clock,
11847 *n_readyp);
11849 /* Traverse the ready list from the head (the instruction to issue
11850 first), and looking for the first instruction that can issue as
11851 younger and the first instruction that can dual-issue only as
11852 older. */
11853 for (i = *n_readyp - 1; i >= 0; i--)
11855 rtx_insn *insn = ready[i];
11856 if (cortexa7_older_only (insn))
11858 first_older_only = i;
11859 if (verbose > 5)
11860 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11861 break;
11863 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11864 first_younger = i;
11867 /* Nothing to reorder because either no younger insn found or insn
11868 that can dual-issue only as older appears before any insn that
11869 can dual-issue as younger. */
11870 if (first_younger == -1)
11872 if (verbose > 5)
11873 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11874 return;
11877 /* Nothing to reorder because no older-only insn in the ready list. */
11878 if (first_older_only == -1)
11880 if (verbose > 5)
11881 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11882 return;
11885 /* Move first_older_only insn before first_younger. */
11886 if (verbose > 5)
11887 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11888 INSN_UID(ready [first_older_only]),
11889 INSN_UID(ready [first_younger]));
11890 rtx_insn *first_older_only_insn = ready [first_older_only];
11891 for (i = first_older_only; i < first_younger; i++)
11893 ready[i] = ready[i+1];
11896 ready[i] = first_older_only_insn;
11897 return;
11900 /* Implement TARGET_SCHED_REORDER. */
11901 static int
11902 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11903 int clock)
11905 switch (arm_tune)
11907 case cortexa7:
11908 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11909 break;
11910 default:
11911 /* Do nothing for other cores. */
11912 break;
11915 return arm_issue_rate ();
11918 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11919 It corrects the value of COST based on the relationship between
11920 INSN and DEP through the dependence LINK. It returns the new
11921 value. There is a per-core adjust_cost hook to adjust scheduler costs
11922 and the per-core hook can choose to completely override the generic
11923 adjust_cost function. Only put bits of code into arm_adjust_cost that
11924 are common across all cores. */
11925 static int
11926 arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
11928 rtx i_pat, d_pat;
11930 /* When generating Thumb-1 code, we want to place flag-setting operations
11931 close to a conditional branch which depends on them, so that we can
11932 omit the comparison. */
11933 if (TARGET_THUMB1
11934 && REG_NOTE_KIND (link) == 0
11935 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11936 && recog_memoized (dep) >= 0
11937 && get_attr_conds (dep) == CONDS_SET)
11938 return 0;
11940 if (current_tune->sched_adjust_cost != NULL)
11942 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11943 return cost;
11946 /* XXX Is this strictly true? */
11947 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11948 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11949 return 0;
11951 /* Call insns don't incur a stall, even if they follow a load. */
11952 if (REG_NOTE_KIND (link) == 0
11953 && CALL_P (insn))
11954 return 1;
11956 if ((i_pat = single_set (insn)) != NULL
11957 && MEM_P (SET_SRC (i_pat))
11958 && (d_pat = single_set (dep)) != NULL
11959 && MEM_P (SET_DEST (d_pat)))
11961 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11962 /* This is a load after a store, there is no conflict if the load reads
11963 from a cached area. Assume that loads from the stack, and from the
11964 constant pool are cached, and that others will miss. This is a
11965 hack. */
11967 if ((GET_CODE (src_mem) == SYMBOL_REF
11968 && CONSTANT_POOL_ADDRESS_P (src_mem))
11969 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11970 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11971 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11972 return 1;
11975 return cost;
11979 arm_max_conditional_execute (void)
11981 return max_insns_skipped;
11984 static int
11985 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11987 if (TARGET_32BIT)
11988 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11989 else
11990 return (optimize > 0) ? 2 : 0;
11993 static int
11994 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11996 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11999 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12000 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12001 sequences of non-executed instructions in IT blocks probably take the same
12002 amount of time as executed instructions (and the IT instruction itself takes
12003 space in icache). This function was experimentally determined to give good
12004 results on a popular embedded benchmark. */
12006 static int
12007 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12009 return (TARGET_32BIT && speed_p) ? 1
12010 : arm_default_branch_cost (speed_p, predictable_p);
12013 static bool fp_consts_inited = false;
12015 static REAL_VALUE_TYPE value_fp0;
12017 static void
12018 init_fp_table (void)
12020 REAL_VALUE_TYPE r;
12022 r = REAL_VALUE_ATOF ("0", DFmode);
12023 value_fp0 = r;
12024 fp_consts_inited = true;
12027 /* Return TRUE if rtx X is a valid immediate FP constant. */
12029 arm_const_double_rtx (rtx x)
12031 REAL_VALUE_TYPE r;
12033 if (!fp_consts_inited)
12034 init_fp_table ();
12036 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12037 if (REAL_VALUE_MINUS_ZERO (r))
12038 return 0;
12040 if (REAL_VALUES_EQUAL (r, value_fp0))
12041 return 1;
12043 return 0;
12046 /* VFPv3 has a fairly wide range of representable immediates, formed from
12047 "quarter-precision" floating-point values. These can be evaluated using this
12048 formula (with ^ for exponentiation):
12050 -1^s * n * 2^-r
12052 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12053 16 <= n <= 31 and 0 <= r <= 7.
12055 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12057 - A (most-significant) is the sign bit.
12058 - BCD are the exponent (encoded as r XOR 3).
12059 - EFGH are the mantissa (encoded as n - 16).
12062 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12063 fconst[sd] instruction, or -1 if X isn't suitable. */
12064 static int
12065 vfp3_const_double_index (rtx x)
12067 REAL_VALUE_TYPE r, m;
12068 int sign, exponent;
12069 unsigned HOST_WIDE_INT mantissa, mant_hi;
12070 unsigned HOST_WIDE_INT mask;
12071 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12072 bool fail;
12074 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12075 return -1;
12077 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12079 /* We can't represent these things, so detect them first. */
12080 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12081 return -1;
12083 /* Extract sign, exponent and mantissa. */
12084 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12085 r = real_value_abs (&r);
12086 exponent = REAL_EXP (&r);
12087 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12088 highest (sign) bit, with a fixed binary point at bit point_pos.
12089 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12090 bits for the mantissa, this may fail (low bits would be lost). */
12091 real_ldexp (&m, &r, point_pos - exponent);
12092 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12093 mantissa = w.elt (0);
12094 mant_hi = w.elt (1);
12096 /* If there are bits set in the low part of the mantissa, we can't
12097 represent this value. */
12098 if (mantissa != 0)
12099 return -1;
12101 /* Now make it so that mantissa contains the most-significant bits, and move
12102 the point_pos to indicate that the least-significant bits have been
12103 discarded. */
12104 point_pos -= HOST_BITS_PER_WIDE_INT;
12105 mantissa = mant_hi;
12107 /* We can permit four significant bits of mantissa only, plus a high bit
12108 which is always 1. */
12109 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12110 if ((mantissa & mask) != 0)
12111 return -1;
12113 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12114 mantissa >>= point_pos - 5;
12116 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12117 floating-point immediate zero with Neon using an integer-zero load, but
12118 that case is handled elsewhere.) */
12119 if (mantissa == 0)
12120 return -1;
12122 gcc_assert (mantissa >= 16 && mantissa <= 31);
12124 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12125 normalized significands are in the range [1, 2). (Our mantissa is shifted
12126 left 4 places at this point relative to normalized IEEE754 values). GCC
12127 internally uses [0.5, 1) (see real.c), so the exponent returned from
12128 REAL_EXP must be altered. */
12129 exponent = 5 - exponent;
12131 if (exponent < 0 || exponent > 7)
12132 return -1;
12134 /* Sign, mantissa and exponent are now in the correct form to plug into the
12135 formula described in the comment above. */
12136 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12139 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12141 vfp3_const_double_rtx (rtx x)
12143 if (!TARGET_VFP3)
12144 return 0;
12146 return vfp3_const_double_index (x) != -1;
12149 /* Recognize immediates which can be used in various Neon instructions. Legal
12150 immediates are described by the following table (for VMVN variants, the
12151 bitwise inverse of the constant shown is recognized. In either case, VMOV
12152 is output and the correct instruction to use for a given constant is chosen
12153 by the assembler). The constant shown is replicated across all elements of
12154 the destination vector.
12156 insn elems variant constant (binary)
12157 ---- ----- ------- -----------------
12158 vmov i32 0 00000000 00000000 00000000 abcdefgh
12159 vmov i32 1 00000000 00000000 abcdefgh 00000000
12160 vmov i32 2 00000000 abcdefgh 00000000 00000000
12161 vmov i32 3 abcdefgh 00000000 00000000 00000000
12162 vmov i16 4 00000000 abcdefgh
12163 vmov i16 5 abcdefgh 00000000
12164 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12165 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12166 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12167 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12168 vmvn i16 10 00000000 abcdefgh
12169 vmvn i16 11 abcdefgh 00000000
12170 vmov i32 12 00000000 00000000 abcdefgh 11111111
12171 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12172 vmov i32 14 00000000 abcdefgh 11111111 11111111
12173 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12174 vmov i8 16 abcdefgh
12175 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12176 eeeeeeee ffffffff gggggggg hhhhhhhh
12177 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12178 vmov f32 19 00000000 00000000 00000000 00000000
12180 For case 18, B = !b. Representable values are exactly those accepted by
12181 vfp3_const_double_index, but are output as floating-point numbers rather
12182 than indices.
12184 For case 19, we will change it to vmov.i32 when assembling.
12186 Variants 0-5 (inclusive) may also be used as immediates for the second
12187 operand of VORR/VBIC instructions.
12189 The INVERSE argument causes the bitwise inverse of the given operand to be
12190 recognized instead (used for recognizing legal immediates for the VAND/VORN
12191 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12192 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12193 output, rather than the real insns vbic/vorr).
12195 INVERSE makes no difference to the recognition of float vectors.
12197 The return value is the variant of immediate as shown in the above table, or
12198 -1 if the given value doesn't match any of the listed patterns.
12200 static int
12201 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12202 rtx *modconst, int *elementwidth)
12204 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12205 matches = 1; \
12206 for (i = 0; i < idx; i += (STRIDE)) \
12207 if (!(TEST)) \
12208 matches = 0; \
12209 if (matches) \
12211 immtype = (CLASS); \
12212 elsize = (ELSIZE); \
12213 break; \
12216 unsigned int i, elsize = 0, idx = 0, n_elts;
12217 unsigned int innersize;
12218 unsigned char bytes[16];
12219 int immtype = -1, matches;
12220 unsigned int invmask = inverse ? 0xff : 0;
12221 bool vector = GET_CODE (op) == CONST_VECTOR;
12223 if (vector)
12225 n_elts = CONST_VECTOR_NUNITS (op);
12226 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12228 else
12230 n_elts = 1;
12231 if (mode == VOIDmode)
12232 mode = DImode;
12233 innersize = GET_MODE_SIZE (mode);
12236 /* Vectors of float constants. */
12237 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12239 rtx el0 = CONST_VECTOR_ELT (op, 0);
12240 REAL_VALUE_TYPE r0;
12242 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12243 return -1;
12245 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12247 for (i = 1; i < n_elts; i++)
12249 rtx elt = CONST_VECTOR_ELT (op, i);
12250 REAL_VALUE_TYPE re;
12252 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12254 if (!REAL_VALUES_EQUAL (r0, re))
12255 return -1;
12258 if (modconst)
12259 *modconst = CONST_VECTOR_ELT (op, 0);
12261 if (elementwidth)
12262 *elementwidth = 0;
12264 if (el0 == CONST0_RTX (GET_MODE (el0)))
12265 return 19;
12266 else
12267 return 18;
12270 /* Splat vector constant out into a byte vector. */
12271 for (i = 0; i < n_elts; i++)
12273 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12274 unsigned HOST_WIDE_INT elpart;
12275 unsigned int part, parts;
12277 if (CONST_INT_P (el))
12279 elpart = INTVAL (el);
12280 parts = 1;
12282 else if (CONST_DOUBLE_P (el))
12284 elpart = CONST_DOUBLE_LOW (el);
12285 parts = 2;
12287 else
12288 gcc_unreachable ();
12290 for (part = 0; part < parts; part++)
12292 unsigned int byte;
12293 for (byte = 0; byte < innersize; byte++)
12295 bytes[idx++] = (elpart & 0xff) ^ invmask;
12296 elpart >>= BITS_PER_UNIT;
12298 if (CONST_DOUBLE_P (el))
12299 elpart = CONST_DOUBLE_HIGH (el);
12303 /* Sanity check. */
12304 gcc_assert (idx == GET_MODE_SIZE (mode));
12308 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12309 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12311 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12312 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12314 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12315 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12317 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12318 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12320 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12322 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12324 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12325 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12327 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12328 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12330 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12331 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12333 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12334 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12336 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12338 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12340 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12341 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12343 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12344 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12346 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12347 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12349 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12350 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12352 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12354 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12355 && bytes[i] == bytes[(i + 8) % idx]);
12357 while (0);
12359 if (immtype == -1)
12360 return -1;
12362 if (elementwidth)
12363 *elementwidth = elsize;
12365 if (modconst)
12367 unsigned HOST_WIDE_INT imm = 0;
12369 /* Un-invert bytes of recognized vector, if necessary. */
12370 if (invmask != 0)
12371 for (i = 0; i < idx; i++)
12372 bytes[i] ^= invmask;
12374 if (immtype == 17)
12376 /* FIXME: Broken on 32-bit H_W_I hosts. */
12377 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12379 for (i = 0; i < 8; i++)
12380 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12381 << (i * BITS_PER_UNIT);
12383 *modconst = GEN_INT (imm);
12385 else
12387 unsigned HOST_WIDE_INT imm = 0;
12389 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12390 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12392 *modconst = GEN_INT (imm);
12396 return immtype;
12397 #undef CHECK
12400 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12401 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12402 float elements), and a modified constant (whatever should be output for a
12403 VMOV) in *MODCONST. */
12406 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12407 rtx *modconst, int *elementwidth)
12409 rtx tmpconst;
12410 int tmpwidth;
12411 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12413 if (retval == -1)
12414 return 0;
12416 if (modconst)
12417 *modconst = tmpconst;
12419 if (elementwidth)
12420 *elementwidth = tmpwidth;
12422 return 1;
12425 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12426 the immediate is valid, write a constant suitable for using as an operand
12427 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12428 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12431 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12432 rtx *modconst, int *elementwidth)
12434 rtx tmpconst;
12435 int tmpwidth;
12436 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12438 if (retval < 0 || retval > 5)
12439 return 0;
12441 if (modconst)
12442 *modconst = tmpconst;
12444 if (elementwidth)
12445 *elementwidth = tmpwidth;
12447 return 1;
12450 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12451 the immediate is valid, write a constant suitable for using as an operand
12452 to VSHR/VSHL to *MODCONST and the corresponding element width to
12453 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12454 because they have different limitations. */
12457 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12458 rtx *modconst, int *elementwidth,
12459 bool isleftshift)
12461 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12462 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12463 unsigned HOST_WIDE_INT last_elt = 0;
12464 unsigned HOST_WIDE_INT maxshift;
12466 /* Split vector constant out into a byte vector. */
12467 for (i = 0; i < n_elts; i++)
12469 rtx el = CONST_VECTOR_ELT (op, i);
12470 unsigned HOST_WIDE_INT elpart;
12472 if (CONST_INT_P (el))
12473 elpart = INTVAL (el);
12474 else if (CONST_DOUBLE_P (el))
12475 return 0;
12476 else
12477 gcc_unreachable ();
12479 if (i != 0 && elpart != last_elt)
12480 return 0;
12482 last_elt = elpart;
12485 /* Shift less than element size. */
12486 maxshift = innersize * 8;
12488 if (isleftshift)
12490 /* Left shift immediate value can be from 0 to <size>-1. */
12491 if (last_elt >= maxshift)
12492 return 0;
12494 else
12496 /* Right shift immediate value can be from 1 to <size>. */
12497 if (last_elt == 0 || last_elt > maxshift)
12498 return 0;
12501 if (elementwidth)
12502 *elementwidth = innersize * 8;
12504 if (modconst)
12505 *modconst = CONST_VECTOR_ELT (op, 0);
12507 return 1;
12510 /* Return a string suitable for output of Neon immediate logic operation
12511 MNEM. */
12513 char *
12514 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12515 int inverse, int quad)
12517 int width, is_valid;
12518 static char templ[40];
12520 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12522 gcc_assert (is_valid != 0);
12524 if (quad)
12525 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12526 else
12527 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12529 return templ;
12532 /* Return a string suitable for output of Neon immediate shift operation
12533 (VSHR or VSHL) MNEM. */
12535 char *
12536 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12537 machine_mode mode, int quad,
12538 bool isleftshift)
12540 int width, is_valid;
12541 static char templ[40];
12543 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12544 gcc_assert (is_valid != 0);
12546 if (quad)
12547 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12548 else
12549 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12551 return templ;
12554 /* Output a sequence of pairwise operations to implement a reduction.
12555 NOTE: We do "too much work" here, because pairwise operations work on two
12556 registers-worth of operands in one go. Unfortunately we can't exploit those
12557 extra calculations to do the full operation in fewer steps, I don't think.
12558 Although all vector elements of the result but the first are ignored, we
12559 actually calculate the same result in each of the elements. An alternative
12560 such as initially loading a vector with zero to use as each of the second
12561 operands would use up an additional register and take an extra instruction,
12562 for no particular gain. */
12564 void
12565 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12566 rtx (*reduc) (rtx, rtx, rtx))
12568 machine_mode inner = GET_MODE_INNER (mode);
12569 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12570 rtx tmpsum = op1;
12572 for (i = parts / 2; i >= 1; i /= 2)
12574 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12575 emit_insn (reduc (dest, tmpsum, tmpsum));
12576 tmpsum = dest;
12580 /* If VALS is a vector constant that can be loaded into a register
12581 using VDUP, generate instructions to do so and return an RTX to
12582 assign to the register. Otherwise return NULL_RTX. */
12584 static rtx
12585 neon_vdup_constant (rtx vals)
12587 machine_mode mode = GET_MODE (vals);
12588 machine_mode inner_mode = GET_MODE_INNER (mode);
12589 int n_elts = GET_MODE_NUNITS (mode);
12590 bool all_same = true;
12591 rtx x;
12592 int i;
12594 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12595 return NULL_RTX;
12597 for (i = 0; i < n_elts; ++i)
12599 x = XVECEXP (vals, 0, i);
12600 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12601 all_same = false;
12604 if (!all_same)
12605 /* The elements are not all the same. We could handle repeating
12606 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12607 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12608 vdup.i16). */
12609 return NULL_RTX;
12611 /* We can load this constant by using VDUP and a constant in a
12612 single ARM register. This will be cheaper than a vector
12613 load. */
12615 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12616 return gen_rtx_VEC_DUPLICATE (mode, x);
12619 /* Generate code to load VALS, which is a PARALLEL containing only
12620 constants (for vec_init) or CONST_VECTOR, efficiently into a
12621 register. Returns an RTX to copy into the register, or NULL_RTX
12622 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12625 neon_make_constant (rtx vals)
12627 machine_mode mode = GET_MODE (vals);
12628 rtx target;
12629 rtx const_vec = NULL_RTX;
12630 int n_elts = GET_MODE_NUNITS (mode);
12631 int n_const = 0;
12632 int i;
12634 if (GET_CODE (vals) == CONST_VECTOR)
12635 const_vec = vals;
12636 else if (GET_CODE (vals) == PARALLEL)
12638 /* A CONST_VECTOR must contain only CONST_INTs and
12639 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12640 Only store valid constants in a CONST_VECTOR. */
12641 for (i = 0; i < n_elts; ++i)
12643 rtx x = XVECEXP (vals, 0, i);
12644 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12645 n_const++;
12647 if (n_const == n_elts)
12648 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12650 else
12651 gcc_unreachable ();
12653 if (const_vec != NULL
12654 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12655 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12656 return const_vec;
12657 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12658 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12659 pipeline cycle; creating the constant takes one or two ARM
12660 pipeline cycles. */
12661 return target;
12662 else if (const_vec != NULL_RTX)
12663 /* Load from constant pool. On Cortex-A8 this takes two cycles
12664 (for either double or quad vectors). We can not take advantage
12665 of single-cycle VLD1 because we need a PC-relative addressing
12666 mode. */
12667 return const_vec;
12668 else
12669 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12670 We can not construct an initializer. */
12671 return NULL_RTX;
12674 /* Initialize vector TARGET to VALS. */
12676 void
12677 neon_expand_vector_init (rtx target, rtx vals)
12679 machine_mode mode = GET_MODE (target);
12680 machine_mode inner_mode = GET_MODE_INNER (mode);
12681 int n_elts = GET_MODE_NUNITS (mode);
12682 int n_var = 0, one_var = -1;
12683 bool all_same = true;
12684 rtx x, mem;
12685 int i;
12687 for (i = 0; i < n_elts; ++i)
12689 x = XVECEXP (vals, 0, i);
12690 if (!CONSTANT_P (x))
12691 ++n_var, one_var = i;
12693 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12694 all_same = false;
12697 if (n_var == 0)
12699 rtx constant = neon_make_constant (vals);
12700 if (constant != NULL_RTX)
12702 emit_move_insn (target, constant);
12703 return;
12707 /* Splat a single non-constant element if we can. */
12708 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12710 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12711 emit_insn (gen_rtx_SET (VOIDmode, target,
12712 gen_rtx_VEC_DUPLICATE (mode, x)));
12713 return;
12716 /* One field is non-constant. Load constant then overwrite varying
12717 field. This is more efficient than using the stack. */
12718 if (n_var == 1)
12720 rtx copy = copy_rtx (vals);
12721 rtx index = GEN_INT (one_var);
12723 /* Load constant part of vector, substitute neighboring value for
12724 varying element. */
12725 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12726 neon_expand_vector_init (target, copy);
12728 /* Insert variable. */
12729 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12730 switch (mode)
12732 case V8QImode:
12733 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12734 break;
12735 case V16QImode:
12736 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12737 break;
12738 case V4HImode:
12739 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12740 break;
12741 case V8HImode:
12742 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12743 break;
12744 case V2SImode:
12745 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12746 break;
12747 case V4SImode:
12748 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12749 break;
12750 case V2SFmode:
12751 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12752 break;
12753 case V4SFmode:
12754 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12755 break;
12756 case V2DImode:
12757 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12758 break;
12759 default:
12760 gcc_unreachable ();
12762 return;
12765 /* Construct the vector in memory one field at a time
12766 and load the whole vector. */
12767 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12768 for (i = 0; i < n_elts; i++)
12769 emit_move_insn (adjust_address_nv (mem, inner_mode,
12770 i * GET_MODE_SIZE (inner_mode)),
12771 XVECEXP (vals, 0, i));
12772 emit_move_insn (target, mem);
12775 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12776 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12777 reported source locations are bogus. */
12779 static void
12780 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12781 const char *err)
12783 HOST_WIDE_INT lane;
12785 gcc_assert (CONST_INT_P (operand));
12787 lane = INTVAL (operand);
12789 if (lane < low || lane >= high)
12790 error (err);
12793 /* Bounds-check lanes. */
12795 void
12796 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12798 bounds_check (operand, low, high, "lane out of range");
12801 /* Bounds-check constants. */
12803 void
12804 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12806 bounds_check (operand, low, high, "constant out of range");
12809 HOST_WIDE_INT
12810 neon_element_bits (machine_mode mode)
12812 if (mode == DImode)
12813 return GET_MODE_BITSIZE (mode);
12814 else
12815 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12819 /* Predicates for `match_operand' and `match_operator'. */
12821 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12822 WB is true if full writeback address modes are allowed and is false
12823 if limited writeback address modes (POST_INC and PRE_DEC) are
12824 allowed. */
12827 arm_coproc_mem_operand (rtx op, bool wb)
12829 rtx ind;
12831 /* Reject eliminable registers. */
12832 if (! (reload_in_progress || reload_completed || lra_in_progress)
12833 && ( reg_mentioned_p (frame_pointer_rtx, op)
12834 || reg_mentioned_p (arg_pointer_rtx, op)
12835 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12836 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12837 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12838 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12839 return FALSE;
12841 /* Constants are converted into offsets from labels. */
12842 if (!MEM_P (op))
12843 return FALSE;
12845 ind = XEXP (op, 0);
12847 if (reload_completed
12848 && (GET_CODE (ind) == LABEL_REF
12849 || (GET_CODE (ind) == CONST
12850 && GET_CODE (XEXP (ind, 0)) == PLUS
12851 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12852 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12853 return TRUE;
12855 /* Match: (mem (reg)). */
12856 if (REG_P (ind))
12857 return arm_address_register_rtx_p (ind, 0);
12859 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12860 acceptable in any case (subject to verification by
12861 arm_address_register_rtx_p). We need WB to be true to accept
12862 PRE_INC and POST_DEC. */
12863 if (GET_CODE (ind) == POST_INC
12864 || GET_CODE (ind) == PRE_DEC
12865 || (wb
12866 && (GET_CODE (ind) == PRE_INC
12867 || GET_CODE (ind) == POST_DEC)))
12868 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12870 if (wb
12871 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12872 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12873 && GET_CODE (XEXP (ind, 1)) == PLUS
12874 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12875 ind = XEXP (ind, 1);
12877 /* Match:
12878 (plus (reg)
12879 (const)). */
12880 if (GET_CODE (ind) == PLUS
12881 && REG_P (XEXP (ind, 0))
12882 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12883 && CONST_INT_P (XEXP (ind, 1))
12884 && INTVAL (XEXP (ind, 1)) > -1024
12885 && INTVAL (XEXP (ind, 1)) < 1024
12886 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12887 return TRUE;
12889 return FALSE;
12892 /* Return TRUE if OP is a memory operand which we can load or store a vector
12893 to/from. TYPE is one of the following values:
12894 0 - Vector load/stor (vldr)
12895 1 - Core registers (ldm)
12896 2 - Element/structure loads (vld1)
12899 neon_vector_mem_operand (rtx op, int type, bool strict)
12901 rtx ind;
12903 /* Reject eliminable registers. */
12904 if (! (reload_in_progress || reload_completed)
12905 && ( reg_mentioned_p (frame_pointer_rtx, op)
12906 || reg_mentioned_p (arg_pointer_rtx, op)
12907 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12908 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12909 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12910 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12911 return !strict;
12913 /* Constants are converted into offsets from labels. */
12914 if (!MEM_P (op))
12915 return FALSE;
12917 ind = XEXP (op, 0);
12919 if (reload_completed
12920 && (GET_CODE (ind) == LABEL_REF
12921 || (GET_CODE (ind) == CONST
12922 && GET_CODE (XEXP (ind, 0)) == PLUS
12923 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12924 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12925 return TRUE;
12927 /* Match: (mem (reg)). */
12928 if (REG_P (ind))
12929 return arm_address_register_rtx_p (ind, 0);
12931 /* Allow post-increment with Neon registers. */
12932 if ((type != 1 && GET_CODE (ind) == POST_INC)
12933 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12934 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12936 /* Allow post-increment by register for VLDn */
12937 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12938 && GET_CODE (XEXP (ind, 1)) == PLUS
12939 && REG_P (XEXP (XEXP (ind, 1), 1)))
12940 return true;
12942 /* Match:
12943 (plus (reg)
12944 (const)). */
12945 if (type == 0
12946 && GET_CODE (ind) == PLUS
12947 && REG_P (XEXP (ind, 0))
12948 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12949 && CONST_INT_P (XEXP (ind, 1))
12950 && INTVAL (XEXP (ind, 1)) > -1024
12951 /* For quad modes, we restrict the constant offset to be slightly less
12952 than what the instruction format permits. We have no such constraint
12953 on double mode offsets. (This must match arm_legitimate_index_p.) */
12954 && (INTVAL (XEXP (ind, 1))
12955 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12956 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12957 return TRUE;
12959 return FALSE;
12962 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12963 type. */
12965 neon_struct_mem_operand (rtx op)
12967 rtx ind;
12969 /* Reject eliminable registers. */
12970 if (! (reload_in_progress || reload_completed)
12971 && ( reg_mentioned_p (frame_pointer_rtx, op)
12972 || reg_mentioned_p (arg_pointer_rtx, op)
12973 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12974 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12975 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12976 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12977 return FALSE;
12979 /* Constants are converted into offsets from labels. */
12980 if (!MEM_P (op))
12981 return FALSE;
12983 ind = XEXP (op, 0);
12985 if (reload_completed
12986 && (GET_CODE (ind) == LABEL_REF
12987 || (GET_CODE (ind) == CONST
12988 && GET_CODE (XEXP (ind, 0)) == PLUS
12989 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12990 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12991 return TRUE;
12993 /* Match: (mem (reg)). */
12994 if (REG_P (ind))
12995 return arm_address_register_rtx_p (ind, 0);
12997 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12998 if (GET_CODE (ind) == POST_INC
12999 || GET_CODE (ind) == PRE_DEC)
13000 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13002 return FALSE;
13005 /* Return true if X is a register that will be eliminated later on. */
13007 arm_eliminable_register (rtx x)
13009 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13010 || REGNO (x) == ARG_POINTER_REGNUM
13011 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13012 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13015 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13016 coprocessor registers. Otherwise return NO_REGS. */
13018 enum reg_class
13019 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13021 if (mode == HFmode)
13023 if (!TARGET_NEON_FP16)
13024 return GENERAL_REGS;
13025 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13026 return NO_REGS;
13027 return GENERAL_REGS;
13030 /* The neon move patterns handle all legitimate vector and struct
13031 addresses. */
13032 if (TARGET_NEON
13033 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13034 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13035 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13036 || VALID_NEON_STRUCT_MODE (mode)))
13037 return NO_REGS;
13039 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13040 return NO_REGS;
13042 return GENERAL_REGS;
13045 /* Values which must be returned in the most-significant end of the return
13046 register. */
13048 static bool
13049 arm_return_in_msb (const_tree valtype)
13051 return (TARGET_AAPCS_BASED
13052 && BYTES_BIG_ENDIAN
13053 && (AGGREGATE_TYPE_P (valtype)
13054 || TREE_CODE (valtype) == COMPLEX_TYPE
13055 || FIXED_POINT_TYPE_P (valtype)));
13058 /* Return TRUE if X references a SYMBOL_REF. */
13060 symbol_mentioned_p (rtx x)
13062 const char * fmt;
13063 int i;
13065 if (GET_CODE (x) == SYMBOL_REF)
13066 return 1;
13068 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13069 are constant offsets, not symbols. */
13070 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13071 return 0;
13073 fmt = GET_RTX_FORMAT (GET_CODE (x));
13075 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13077 if (fmt[i] == 'E')
13079 int j;
13081 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13082 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13083 return 1;
13085 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13086 return 1;
13089 return 0;
13092 /* Return TRUE if X references a LABEL_REF. */
13094 label_mentioned_p (rtx x)
13096 const char * fmt;
13097 int i;
13099 if (GET_CODE (x) == LABEL_REF)
13100 return 1;
13102 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13103 instruction, but they are constant offsets, not symbols. */
13104 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13105 return 0;
13107 fmt = GET_RTX_FORMAT (GET_CODE (x));
13108 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13110 if (fmt[i] == 'E')
13112 int j;
13114 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13115 if (label_mentioned_p (XVECEXP (x, i, j)))
13116 return 1;
13118 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13119 return 1;
13122 return 0;
13126 tls_mentioned_p (rtx x)
13128 switch (GET_CODE (x))
13130 case CONST:
13131 return tls_mentioned_p (XEXP (x, 0));
13133 case UNSPEC:
13134 if (XINT (x, 1) == UNSPEC_TLS)
13135 return 1;
13137 default:
13138 return 0;
13142 /* Must not copy any rtx that uses a pc-relative address. */
13144 static bool
13145 arm_cannot_copy_insn_p (rtx_insn *insn)
13147 /* The tls call insn cannot be copied, as it is paired with a data
13148 word. */
13149 if (recog_memoized (insn) == CODE_FOR_tlscall)
13150 return true;
13152 subrtx_iterator::array_type array;
13153 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13155 const_rtx x = *iter;
13156 if (GET_CODE (x) == UNSPEC
13157 && (XINT (x, 1) == UNSPEC_PIC_BASE
13158 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13159 return true;
13161 return false;
13164 enum rtx_code
13165 minmax_code (rtx x)
13167 enum rtx_code code = GET_CODE (x);
13169 switch (code)
13171 case SMAX:
13172 return GE;
13173 case SMIN:
13174 return LE;
13175 case UMIN:
13176 return LEU;
13177 case UMAX:
13178 return GEU;
13179 default:
13180 gcc_unreachable ();
13184 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13186 bool
13187 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13188 int *mask, bool *signed_sat)
13190 /* The high bound must be a power of two minus one. */
13191 int log = exact_log2 (INTVAL (hi_bound) + 1);
13192 if (log == -1)
13193 return false;
13195 /* The low bound is either zero (for usat) or one less than the
13196 negation of the high bound (for ssat). */
13197 if (INTVAL (lo_bound) == 0)
13199 if (mask)
13200 *mask = log;
13201 if (signed_sat)
13202 *signed_sat = false;
13204 return true;
13207 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13209 if (mask)
13210 *mask = log + 1;
13211 if (signed_sat)
13212 *signed_sat = true;
13214 return true;
13217 return false;
13220 /* Return 1 if memory locations are adjacent. */
13222 adjacent_mem_locations (rtx a, rtx b)
13224 /* We don't guarantee to preserve the order of these memory refs. */
13225 if (volatile_refs_p (a) || volatile_refs_p (b))
13226 return 0;
13228 if ((REG_P (XEXP (a, 0))
13229 || (GET_CODE (XEXP (a, 0)) == PLUS
13230 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13231 && (REG_P (XEXP (b, 0))
13232 || (GET_CODE (XEXP (b, 0)) == PLUS
13233 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13235 HOST_WIDE_INT val0 = 0, val1 = 0;
13236 rtx reg0, reg1;
13237 int val_diff;
13239 if (GET_CODE (XEXP (a, 0)) == PLUS)
13241 reg0 = XEXP (XEXP (a, 0), 0);
13242 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13244 else
13245 reg0 = XEXP (a, 0);
13247 if (GET_CODE (XEXP (b, 0)) == PLUS)
13249 reg1 = XEXP (XEXP (b, 0), 0);
13250 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13252 else
13253 reg1 = XEXP (b, 0);
13255 /* Don't accept any offset that will require multiple
13256 instructions to handle, since this would cause the
13257 arith_adjacentmem pattern to output an overlong sequence. */
13258 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13259 return 0;
13261 /* Don't allow an eliminable register: register elimination can make
13262 the offset too large. */
13263 if (arm_eliminable_register (reg0))
13264 return 0;
13266 val_diff = val1 - val0;
13268 if (arm_ld_sched)
13270 /* If the target has load delay slots, then there's no benefit
13271 to using an ldm instruction unless the offset is zero and
13272 we are optimizing for size. */
13273 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13274 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13275 && (val_diff == 4 || val_diff == -4));
13278 return ((REGNO (reg0) == REGNO (reg1))
13279 && (val_diff == 4 || val_diff == -4));
13282 return 0;
13285 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13286 for load operations, false for store operations. CONSECUTIVE is true
13287 if the register numbers in the operation must be consecutive in the register
13288 bank. RETURN_PC is true if value is to be loaded in PC.
13289 The pattern we are trying to match for load is:
13290 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13291 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13294 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13296 where
13297 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13298 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13299 3. If consecutive is TRUE, then for kth register being loaded,
13300 REGNO (R_dk) = REGNO (R_d0) + k.
13301 The pattern for store is similar. */
13302 bool
13303 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13304 bool consecutive, bool return_pc)
13306 HOST_WIDE_INT count = XVECLEN (op, 0);
13307 rtx reg, mem, addr;
13308 unsigned regno;
13309 unsigned first_regno;
13310 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13311 rtx elt;
13312 bool addr_reg_in_reglist = false;
13313 bool update = false;
13314 int reg_increment;
13315 int offset_adj;
13316 int regs_per_val;
13318 /* If not in SImode, then registers must be consecutive
13319 (e.g., VLDM instructions for DFmode). */
13320 gcc_assert ((mode == SImode) || consecutive);
13321 /* Setting return_pc for stores is illegal. */
13322 gcc_assert (!return_pc || load);
13324 /* Set up the increments and the regs per val based on the mode. */
13325 reg_increment = GET_MODE_SIZE (mode);
13326 regs_per_val = reg_increment / 4;
13327 offset_adj = return_pc ? 1 : 0;
13329 if (count <= 1
13330 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13331 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13332 return false;
13334 /* Check if this is a write-back. */
13335 elt = XVECEXP (op, 0, offset_adj);
13336 if (GET_CODE (SET_SRC (elt)) == PLUS)
13338 i++;
13339 base = 1;
13340 update = true;
13342 /* The offset adjustment must be the number of registers being
13343 popped times the size of a single register. */
13344 if (!REG_P (SET_DEST (elt))
13345 || !REG_P (XEXP (SET_SRC (elt), 0))
13346 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13347 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13348 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13349 ((count - 1 - offset_adj) * reg_increment))
13350 return false;
13353 i = i + offset_adj;
13354 base = base + offset_adj;
13355 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13356 success depends on the type: VLDM can do just one reg,
13357 LDM must do at least two. */
13358 if ((count <= i) && (mode == SImode))
13359 return false;
13361 elt = XVECEXP (op, 0, i - 1);
13362 if (GET_CODE (elt) != SET)
13363 return false;
13365 if (load)
13367 reg = SET_DEST (elt);
13368 mem = SET_SRC (elt);
13370 else
13372 reg = SET_SRC (elt);
13373 mem = SET_DEST (elt);
13376 if (!REG_P (reg) || !MEM_P (mem))
13377 return false;
13379 regno = REGNO (reg);
13380 first_regno = regno;
13381 addr = XEXP (mem, 0);
13382 if (GET_CODE (addr) == PLUS)
13384 if (!CONST_INT_P (XEXP (addr, 1)))
13385 return false;
13387 offset = INTVAL (XEXP (addr, 1));
13388 addr = XEXP (addr, 0);
13391 if (!REG_P (addr))
13392 return false;
13394 /* Don't allow SP to be loaded unless it is also the base register. It
13395 guarantees that SP is reset correctly when an LDM instruction
13396 is interrupted. Otherwise, we might end up with a corrupt stack. */
13397 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13398 return false;
13400 for (; i < count; i++)
13402 elt = XVECEXP (op, 0, i);
13403 if (GET_CODE (elt) != SET)
13404 return false;
13406 if (load)
13408 reg = SET_DEST (elt);
13409 mem = SET_SRC (elt);
13411 else
13413 reg = SET_SRC (elt);
13414 mem = SET_DEST (elt);
13417 if (!REG_P (reg)
13418 || GET_MODE (reg) != mode
13419 || REGNO (reg) <= regno
13420 || (consecutive
13421 && (REGNO (reg) !=
13422 (unsigned int) (first_regno + regs_per_val * (i - base))))
13423 /* Don't allow SP to be loaded unless it is also the base register. It
13424 guarantees that SP is reset correctly when an LDM instruction
13425 is interrupted. Otherwise, we might end up with a corrupt stack. */
13426 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13427 || !MEM_P (mem)
13428 || GET_MODE (mem) != mode
13429 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13430 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13431 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13432 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13433 offset + (i - base) * reg_increment))
13434 && (!REG_P (XEXP (mem, 0))
13435 || offset + (i - base) * reg_increment != 0)))
13436 return false;
13438 regno = REGNO (reg);
13439 if (regno == REGNO (addr))
13440 addr_reg_in_reglist = true;
13443 if (load)
13445 if (update && addr_reg_in_reglist)
13446 return false;
13448 /* For Thumb-1, address register is always modified - either by write-back
13449 or by explicit load. If the pattern does not describe an update,
13450 then the address register must be in the list of loaded registers. */
13451 if (TARGET_THUMB1)
13452 return update || addr_reg_in_reglist;
13455 return true;
13458 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13459 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13460 instruction. ADD_OFFSET is nonzero if the base address register needs
13461 to be modified with an add instruction before we can use it. */
13463 static bool
13464 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13465 int nops, HOST_WIDE_INT add_offset)
13467 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13468 if the offset isn't small enough. The reason 2 ldrs are faster
13469 is because these ARMs are able to do more than one cache access
13470 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13471 whilst the ARM8 has a double bandwidth cache. This means that
13472 these cores can do both an instruction fetch and a data fetch in
13473 a single cycle, so the trick of calculating the address into a
13474 scratch register (one of the result regs) and then doing a load
13475 multiple actually becomes slower (and no smaller in code size).
13476 That is the transformation
13478 ldr rd1, [rbase + offset]
13479 ldr rd2, [rbase + offset + 4]
13483 add rd1, rbase, offset
13484 ldmia rd1, {rd1, rd2}
13486 produces worse code -- '3 cycles + any stalls on rd2' instead of
13487 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13488 access per cycle, the first sequence could never complete in less
13489 than 6 cycles, whereas the ldm sequence would only take 5 and
13490 would make better use of sequential accesses if not hitting the
13491 cache.
13493 We cheat here and test 'arm_ld_sched' which we currently know to
13494 only be true for the ARM8, ARM9 and StrongARM. If this ever
13495 changes, then the test below needs to be reworked. */
13496 if (nops == 2 && arm_ld_sched && add_offset != 0)
13497 return false;
13499 /* XScale has load-store double instructions, but they have stricter
13500 alignment requirements than load-store multiple, so we cannot
13501 use them.
13503 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13504 the pipeline until completion.
13506 NREGS CYCLES
13512 An ldr instruction takes 1-3 cycles, but does not block the
13513 pipeline.
13515 NREGS CYCLES
13516 1 1-3
13517 2 2-6
13518 3 3-9
13519 4 4-12
13521 Best case ldr will always win. However, the more ldr instructions
13522 we issue, the less likely we are to be able to schedule them well.
13523 Using ldr instructions also increases code size.
13525 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13526 for counts of 3 or 4 regs. */
13527 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13528 return false;
13529 return true;
13532 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13533 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13534 an array ORDER which describes the sequence to use when accessing the
13535 offsets that produces an ascending order. In this sequence, each
13536 offset must be larger by exactly 4 than the previous one. ORDER[0]
13537 must have been filled in with the lowest offset by the caller.
13538 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13539 we use to verify that ORDER produces an ascending order of registers.
13540 Return true if it was possible to construct such an order, false if
13541 not. */
13543 static bool
13544 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13545 int *unsorted_regs)
13547 int i;
13548 for (i = 1; i < nops; i++)
13550 int j;
13552 order[i] = order[i - 1];
13553 for (j = 0; j < nops; j++)
13554 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13556 /* We must find exactly one offset that is higher than the
13557 previous one by 4. */
13558 if (order[i] != order[i - 1])
13559 return false;
13560 order[i] = j;
13562 if (order[i] == order[i - 1])
13563 return false;
13564 /* The register numbers must be ascending. */
13565 if (unsorted_regs != NULL
13566 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13567 return false;
13569 return true;
13572 /* Used to determine in a peephole whether a sequence of load
13573 instructions can be changed into a load-multiple instruction.
13574 NOPS is the number of separate load instructions we are examining. The
13575 first NOPS entries in OPERANDS are the destination registers, the
13576 next NOPS entries are memory operands. If this function is
13577 successful, *BASE is set to the common base register of the memory
13578 accesses; *LOAD_OFFSET is set to the first memory location's offset
13579 from that base register.
13580 REGS is an array filled in with the destination register numbers.
13581 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13582 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13583 the sequence of registers in REGS matches the loads from ascending memory
13584 locations, and the function verifies that the register numbers are
13585 themselves ascending. If CHECK_REGS is false, the register numbers
13586 are stored in the order they are found in the operands. */
13587 static int
13588 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13589 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13591 int unsorted_regs[MAX_LDM_STM_OPS];
13592 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13593 int order[MAX_LDM_STM_OPS];
13594 rtx base_reg_rtx = NULL;
13595 int base_reg = -1;
13596 int i, ldm_case;
13598 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13599 easily extended if required. */
13600 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13602 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13604 /* Loop over the operands and check that the memory references are
13605 suitable (i.e. immediate offsets from the same base register). At
13606 the same time, extract the target register, and the memory
13607 offsets. */
13608 for (i = 0; i < nops; i++)
13610 rtx reg;
13611 rtx offset;
13613 /* Convert a subreg of a mem into the mem itself. */
13614 if (GET_CODE (operands[nops + i]) == SUBREG)
13615 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13617 gcc_assert (MEM_P (operands[nops + i]));
13619 /* Don't reorder volatile memory references; it doesn't seem worth
13620 looking for the case where the order is ok anyway. */
13621 if (MEM_VOLATILE_P (operands[nops + i]))
13622 return 0;
13624 offset = const0_rtx;
13626 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13627 || (GET_CODE (reg) == SUBREG
13628 && REG_P (reg = SUBREG_REG (reg))))
13629 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13630 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13631 || (GET_CODE (reg) == SUBREG
13632 && REG_P (reg = SUBREG_REG (reg))))
13633 && (CONST_INT_P (offset
13634 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13636 if (i == 0)
13638 base_reg = REGNO (reg);
13639 base_reg_rtx = reg;
13640 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13641 return 0;
13643 else if (base_reg != (int) REGNO (reg))
13644 /* Not addressed from the same base register. */
13645 return 0;
13647 unsorted_regs[i] = (REG_P (operands[i])
13648 ? REGNO (operands[i])
13649 : REGNO (SUBREG_REG (operands[i])));
13651 /* If it isn't an integer register, or if it overwrites the
13652 base register but isn't the last insn in the list, then
13653 we can't do this. */
13654 if (unsorted_regs[i] < 0
13655 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13656 || unsorted_regs[i] > 14
13657 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13658 return 0;
13660 /* Don't allow SP to be loaded unless it is also the base
13661 register. It guarantees that SP is reset correctly when
13662 an LDM instruction is interrupted. Otherwise, we might
13663 end up with a corrupt stack. */
13664 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13665 return 0;
13667 unsorted_offsets[i] = INTVAL (offset);
13668 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13669 order[0] = i;
13671 else
13672 /* Not a suitable memory address. */
13673 return 0;
13676 /* All the useful information has now been extracted from the
13677 operands into unsorted_regs and unsorted_offsets; additionally,
13678 order[0] has been set to the lowest offset in the list. Sort
13679 the offsets into order, verifying that they are adjacent, and
13680 check that the register numbers are ascending. */
13681 if (!compute_offset_order (nops, unsorted_offsets, order,
13682 check_regs ? unsorted_regs : NULL))
13683 return 0;
13685 if (saved_order)
13686 memcpy (saved_order, order, sizeof order);
13688 if (base)
13690 *base = base_reg;
13692 for (i = 0; i < nops; i++)
13693 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13695 *load_offset = unsorted_offsets[order[0]];
13698 if (TARGET_THUMB1
13699 && !peep2_reg_dead_p (nops, base_reg_rtx))
13700 return 0;
13702 if (unsorted_offsets[order[0]] == 0)
13703 ldm_case = 1; /* ldmia */
13704 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13705 ldm_case = 2; /* ldmib */
13706 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13707 ldm_case = 3; /* ldmda */
13708 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13709 ldm_case = 4; /* ldmdb */
13710 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13711 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13712 ldm_case = 5;
13713 else
13714 return 0;
13716 if (!multiple_operation_profitable_p (false, nops,
13717 ldm_case == 5
13718 ? unsorted_offsets[order[0]] : 0))
13719 return 0;
13721 return ldm_case;
13724 /* Used to determine in a peephole whether a sequence of store instructions can
13725 be changed into a store-multiple instruction.
13726 NOPS is the number of separate store instructions we are examining.
13727 NOPS_TOTAL is the total number of instructions recognized by the peephole
13728 pattern.
13729 The first NOPS entries in OPERANDS are the source registers, the next
13730 NOPS entries are memory operands. If this function is successful, *BASE is
13731 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13732 to the first memory location's offset from that base register. REGS is an
13733 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13734 likewise filled with the corresponding rtx's.
13735 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13736 numbers to an ascending order of stores.
13737 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13738 from ascending memory locations, and the function verifies that the register
13739 numbers are themselves ascending. If CHECK_REGS is false, the register
13740 numbers are stored in the order they are found in the operands. */
13741 static int
13742 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13743 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13744 HOST_WIDE_INT *load_offset, bool check_regs)
13746 int unsorted_regs[MAX_LDM_STM_OPS];
13747 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13748 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13749 int order[MAX_LDM_STM_OPS];
13750 int base_reg = -1;
13751 rtx base_reg_rtx = NULL;
13752 int i, stm_case;
13754 /* Write back of base register is currently only supported for Thumb 1. */
13755 int base_writeback = TARGET_THUMB1;
13757 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13758 easily extended if required. */
13759 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13761 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13763 /* Loop over the operands and check that the memory references are
13764 suitable (i.e. immediate offsets from the same base register). At
13765 the same time, extract the target register, and the memory
13766 offsets. */
13767 for (i = 0; i < nops; i++)
13769 rtx reg;
13770 rtx offset;
13772 /* Convert a subreg of a mem into the mem itself. */
13773 if (GET_CODE (operands[nops + i]) == SUBREG)
13774 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13776 gcc_assert (MEM_P (operands[nops + i]));
13778 /* Don't reorder volatile memory references; it doesn't seem worth
13779 looking for the case where the order is ok anyway. */
13780 if (MEM_VOLATILE_P (operands[nops + i]))
13781 return 0;
13783 offset = const0_rtx;
13785 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13786 || (GET_CODE (reg) == SUBREG
13787 && REG_P (reg = SUBREG_REG (reg))))
13788 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13789 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13790 || (GET_CODE (reg) == SUBREG
13791 && REG_P (reg = SUBREG_REG (reg))))
13792 && (CONST_INT_P (offset
13793 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13795 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13796 ? operands[i] : SUBREG_REG (operands[i]));
13797 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13799 if (i == 0)
13801 base_reg = REGNO (reg);
13802 base_reg_rtx = reg;
13803 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13804 return 0;
13806 else if (base_reg != (int) REGNO (reg))
13807 /* Not addressed from the same base register. */
13808 return 0;
13810 /* If it isn't an integer register, then we can't do this. */
13811 if (unsorted_regs[i] < 0
13812 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13813 /* The effects are unpredictable if the base register is
13814 both updated and stored. */
13815 || (base_writeback && unsorted_regs[i] == base_reg)
13816 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13817 || unsorted_regs[i] > 14)
13818 return 0;
13820 unsorted_offsets[i] = INTVAL (offset);
13821 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13822 order[0] = i;
13824 else
13825 /* Not a suitable memory address. */
13826 return 0;
13829 /* All the useful information has now been extracted from the
13830 operands into unsorted_regs and unsorted_offsets; additionally,
13831 order[0] has been set to the lowest offset in the list. Sort
13832 the offsets into order, verifying that they are adjacent, and
13833 check that the register numbers are ascending. */
13834 if (!compute_offset_order (nops, unsorted_offsets, order,
13835 check_regs ? unsorted_regs : NULL))
13836 return 0;
13838 if (saved_order)
13839 memcpy (saved_order, order, sizeof order);
13841 if (base)
13843 *base = base_reg;
13845 for (i = 0; i < nops; i++)
13847 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13848 if (reg_rtxs)
13849 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13852 *load_offset = unsorted_offsets[order[0]];
13855 if (TARGET_THUMB1
13856 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13857 return 0;
13859 if (unsorted_offsets[order[0]] == 0)
13860 stm_case = 1; /* stmia */
13861 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13862 stm_case = 2; /* stmib */
13863 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13864 stm_case = 3; /* stmda */
13865 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13866 stm_case = 4; /* stmdb */
13867 else
13868 return 0;
13870 if (!multiple_operation_profitable_p (false, nops, 0))
13871 return 0;
13873 return stm_case;
13876 /* Routines for use in generating RTL. */
13878 /* Generate a load-multiple instruction. COUNT is the number of loads in
13879 the instruction; REGS and MEMS are arrays containing the operands.
13880 BASEREG is the base register to be used in addressing the memory operands.
13881 WBACK_OFFSET is nonzero if the instruction should update the base
13882 register. */
13884 static rtx
13885 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13886 HOST_WIDE_INT wback_offset)
13888 int i = 0, j;
13889 rtx result;
13891 if (!multiple_operation_profitable_p (false, count, 0))
13893 rtx seq;
13895 start_sequence ();
13897 for (i = 0; i < count; i++)
13898 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13900 if (wback_offset != 0)
13901 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13903 seq = get_insns ();
13904 end_sequence ();
13906 return seq;
13909 result = gen_rtx_PARALLEL (VOIDmode,
13910 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13911 if (wback_offset != 0)
13913 XVECEXP (result, 0, 0)
13914 = gen_rtx_SET (VOIDmode, basereg,
13915 plus_constant (Pmode, basereg, wback_offset));
13916 i = 1;
13917 count++;
13920 for (j = 0; i < count; i++, j++)
13921 XVECEXP (result, 0, i)
13922 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13924 return result;
13927 /* Generate a store-multiple instruction. COUNT is the number of stores in
13928 the instruction; REGS and MEMS are arrays containing the operands.
13929 BASEREG is the base register to be used in addressing the memory operands.
13930 WBACK_OFFSET is nonzero if the instruction should update the base
13931 register. */
13933 static rtx
13934 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13935 HOST_WIDE_INT wback_offset)
13937 int i = 0, j;
13938 rtx result;
13940 if (GET_CODE (basereg) == PLUS)
13941 basereg = XEXP (basereg, 0);
13943 if (!multiple_operation_profitable_p (false, count, 0))
13945 rtx seq;
13947 start_sequence ();
13949 for (i = 0; i < count; i++)
13950 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13952 if (wback_offset != 0)
13953 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13955 seq = get_insns ();
13956 end_sequence ();
13958 return seq;
13961 result = gen_rtx_PARALLEL (VOIDmode,
13962 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13963 if (wback_offset != 0)
13965 XVECEXP (result, 0, 0)
13966 = gen_rtx_SET (VOIDmode, basereg,
13967 plus_constant (Pmode, basereg, wback_offset));
13968 i = 1;
13969 count++;
13972 for (j = 0; i < count; i++, j++)
13973 XVECEXP (result, 0, i)
13974 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
13976 return result;
13979 /* Generate either a load-multiple or a store-multiple instruction. This
13980 function can be used in situations where we can start with a single MEM
13981 rtx and adjust its address upwards.
13982 COUNT is the number of operations in the instruction, not counting a
13983 possible update of the base register. REGS is an array containing the
13984 register operands.
13985 BASEREG is the base register to be used in addressing the memory operands,
13986 which are constructed from BASEMEM.
13987 WRITE_BACK specifies whether the generated instruction should include an
13988 update of the base register.
13989 OFFSETP is used to pass an offset to and from this function; this offset
13990 is not used when constructing the address (instead BASEMEM should have an
13991 appropriate offset in its address), it is used only for setting
13992 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13994 static rtx
13995 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13996 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13998 rtx mems[MAX_LDM_STM_OPS];
13999 HOST_WIDE_INT offset = *offsetp;
14000 int i;
14002 gcc_assert (count <= MAX_LDM_STM_OPS);
14004 if (GET_CODE (basereg) == PLUS)
14005 basereg = XEXP (basereg, 0);
14007 for (i = 0; i < count; i++)
14009 rtx addr = plus_constant (Pmode, basereg, i * 4);
14010 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14011 offset += 4;
14014 if (write_back)
14015 *offsetp = offset;
14017 if (is_load)
14018 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14019 write_back ? 4 * count : 0);
14020 else
14021 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14022 write_back ? 4 * count : 0);
14026 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14027 rtx basemem, HOST_WIDE_INT *offsetp)
14029 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14030 offsetp);
14034 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14035 rtx basemem, HOST_WIDE_INT *offsetp)
14037 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14038 offsetp);
14041 /* Called from a peephole2 expander to turn a sequence of loads into an
14042 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14043 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14044 is true if we can reorder the registers because they are used commutatively
14045 subsequently.
14046 Returns true iff we could generate a new instruction. */
14048 bool
14049 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14051 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14052 rtx mems[MAX_LDM_STM_OPS];
14053 int i, j, base_reg;
14054 rtx base_reg_rtx;
14055 HOST_WIDE_INT offset;
14056 int write_back = FALSE;
14057 int ldm_case;
14058 rtx addr;
14060 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14061 &base_reg, &offset, !sort_regs);
14063 if (ldm_case == 0)
14064 return false;
14066 if (sort_regs)
14067 for (i = 0; i < nops - 1; i++)
14068 for (j = i + 1; j < nops; j++)
14069 if (regs[i] > regs[j])
14071 int t = regs[i];
14072 regs[i] = regs[j];
14073 regs[j] = t;
14075 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14077 if (TARGET_THUMB1)
14079 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14080 gcc_assert (ldm_case == 1 || ldm_case == 5);
14081 write_back = TRUE;
14084 if (ldm_case == 5)
14086 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14087 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14088 offset = 0;
14089 if (!TARGET_THUMB1)
14091 base_reg = regs[0];
14092 base_reg_rtx = newbase;
14096 for (i = 0; i < nops; i++)
14098 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14099 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14100 SImode, addr, 0);
14102 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14103 write_back ? offset + i * 4 : 0));
14104 return true;
14107 /* Called from a peephole2 expander to turn a sequence of stores into an
14108 STM instruction. OPERANDS are the operands found by the peephole matcher;
14109 NOPS indicates how many separate stores we are trying to combine.
14110 Returns true iff we could generate a new instruction. */
14112 bool
14113 gen_stm_seq (rtx *operands, int nops)
14115 int i;
14116 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14117 rtx mems[MAX_LDM_STM_OPS];
14118 int base_reg;
14119 rtx base_reg_rtx;
14120 HOST_WIDE_INT offset;
14121 int write_back = FALSE;
14122 int stm_case;
14123 rtx addr;
14124 bool base_reg_dies;
14126 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14127 mem_order, &base_reg, &offset, true);
14129 if (stm_case == 0)
14130 return false;
14132 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14134 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14135 if (TARGET_THUMB1)
14137 gcc_assert (base_reg_dies);
14138 write_back = TRUE;
14141 if (stm_case == 5)
14143 gcc_assert (base_reg_dies);
14144 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14145 offset = 0;
14148 addr = plus_constant (Pmode, base_reg_rtx, offset);
14150 for (i = 0; i < nops; i++)
14152 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14153 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14154 SImode, addr, 0);
14156 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14157 write_back ? offset + i * 4 : 0));
14158 return true;
14161 /* Called from a peephole2 expander to turn a sequence of stores that are
14162 preceded by constant loads into an STM instruction. OPERANDS are the
14163 operands found by the peephole matcher; NOPS indicates how many
14164 separate stores we are trying to combine; there are 2 * NOPS
14165 instructions in the peephole.
14166 Returns true iff we could generate a new instruction. */
14168 bool
14169 gen_const_stm_seq (rtx *operands, int nops)
14171 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14172 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14173 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14174 rtx mems[MAX_LDM_STM_OPS];
14175 int base_reg;
14176 rtx base_reg_rtx;
14177 HOST_WIDE_INT offset;
14178 int write_back = FALSE;
14179 int stm_case;
14180 rtx addr;
14181 bool base_reg_dies;
14182 int i, j;
14183 HARD_REG_SET allocated;
14185 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14186 mem_order, &base_reg, &offset, false);
14188 if (stm_case == 0)
14189 return false;
14191 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14193 /* If the same register is used more than once, try to find a free
14194 register. */
14195 CLEAR_HARD_REG_SET (allocated);
14196 for (i = 0; i < nops; i++)
14198 for (j = i + 1; j < nops; j++)
14199 if (regs[i] == regs[j])
14201 rtx t = peep2_find_free_register (0, nops * 2,
14202 TARGET_THUMB1 ? "l" : "r",
14203 SImode, &allocated);
14204 if (t == NULL_RTX)
14205 return false;
14206 reg_rtxs[i] = t;
14207 regs[i] = REGNO (t);
14211 /* Compute an ordering that maps the register numbers to an ascending
14212 sequence. */
14213 reg_order[0] = 0;
14214 for (i = 0; i < nops; i++)
14215 if (regs[i] < regs[reg_order[0]])
14216 reg_order[0] = i;
14218 for (i = 1; i < nops; i++)
14220 int this_order = reg_order[i - 1];
14221 for (j = 0; j < nops; j++)
14222 if (regs[j] > regs[reg_order[i - 1]]
14223 && (this_order == reg_order[i - 1]
14224 || regs[j] < regs[this_order]))
14225 this_order = j;
14226 reg_order[i] = this_order;
14229 /* Ensure that registers that must be live after the instruction end
14230 up with the correct value. */
14231 for (i = 0; i < nops; i++)
14233 int this_order = reg_order[i];
14234 if ((this_order != mem_order[i]
14235 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14236 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14237 return false;
14240 /* Load the constants. */
14241 for (i = 0; i < nops; i++)
14243 rtx op = operands[2 * nops + mem_order[i]];
14244 sorted_regs[i] = regs[reg_order[i]];
14245 emit_move_insn (reg_rtxs[reg_order[i]], op);
14248 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14250 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14251 if (TARGET_THUMB1)
14253 gcc_assert (base_reg_dies);
14254 write_back = TRUE;
14257 if (stm_case == 5)
14259 gcc_assert (base_reg_dies);
14260 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14261 offset = 0;
14264 addr = plus_constant (Pmode, base_reg_rtx, offset);
14266 for (i = 0; i < nops; i++)
14268 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14269 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14270 SImode, addr, 0);
14272 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14273 write_back ? offset + i * 4 : 0));
14274 return true;
14277 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14278 unaligned copies on processors which support unaligned semantics for those
14279 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14280 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14281 An interleave factor of 1 (the minimum) will perform no interleaving.
14282 Load/store multiple are used for aligned addresses where possible. */
14284 static void
14285 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14286 HOST_WIDE_INT length,
14287 unsigned int interleave_factor)
14289 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14290 int *regnos = XALLOCAVEC (int, interleave_factor);
14291 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14292 HOST_WIDE_INT i, j;
14293 HOST_WIDE_INT remaining = length, words;
14294 rtx halfword_tmp = NULL, byte_tmp = NULL;
14295 rtx dst, src;
14296 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14297 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14298 HOST_WIDE_INT srcoffset, dstoffset;
14299 HOST_WIDE_INT src_autoinc, dst_autoinc;
14300 rtx mem, addr;
14302 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14304 /* Use hard registers if we have aligned source or destination so we can use
14305 load/store multiple with contiguous registers. */
14306 if (dst_aligned || src_aligned)
14307 for (i = 0; i < interleave_factor; i++)
14308 regs[i] = gen_rtx_REG (SImode, i);
14309 else
14310 for (i = 0; i < interleave_factor; i++)
14311 regs[i] = gen_reg_rtx (SImode);
14313 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14314 src = copy_addr_to_reg (XEXP (srcbase, 0));
14316 srcoffset = dstoffset = 0;
14318 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14319 For copying the last bytes we want to subtract this offset again. */
14320 src_autoinc = dst_autoinc = 0;
14322 for (i = 0; i < interleave_factor; i++)
14323 regnos[i] = i;
14325 /* Copy BLOCK_SIZE_BYTES chunks. */
14327 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14329 /* Load words. */
14330 if (src_aligned && interleave_factor > 1)
14332 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14333 TRUE, srcbase, &srcoffset));
14334 src_autoinc += UNITS_PER_WORD * interleave_factor;
14336 else
14338 for (j = 0; j < interleave_factor; j++)
14340 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14341 - src_autoinc));
14342 mem = adjust_automodify_address (srcbase, SImode, addr,
14343 srcoffset + j * UNITS_PER_WORD);
14344 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14346 srcoffset += block_size_bytes;
14349 /* Store words. */
14350 if (dst_aligned && interleave_factor > 1)
14352 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14353 TRUE, dstbase, &dstoffset));
14354 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14356 else
14358 for (j = 0; j < interleave_factor; j++)
14360 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14361 - dst_autoinc));
14362 mem = adjust_automodify_address (dstbase, SImode, addr,
14363 dstoffset + j * UNITS_PER_WORD);
14364 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14366 dstoffset += block_size_bytes;
14369 remaining -= block_size_bytes;
14372 /* Copy any whole words left (note these aren't interleaved with any
14373 subsequent halfword/byte load/stores in the interests of simplicity). */
14375 words = remaining / UNITS_PER_WORD;
14377 gcc_assert (words < interleave_factor);
14379 if (src_aligned && words > 1)
14381 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14382 &srcoffset));
14383 src_autoinc += UNITS_PER_WORD * words;
14385 else
14387 for (j = 0; j < words; j++)
14389 addr = plus_constant (Pmode, src,
14390 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14391 mem = adjust_automodify_address (srcbase, SImode, addr,
14392 srcoffset + j * UNITS_PER_WORD);
14393 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14395 srcoffset += words * UNITS_PER_WORD;
14398 if (dst_aligned && words > 1)
14400 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14401 &dstoffset));
14402 dst_autoinc += words * UNITS_PER_WORD;
14404 else
14406 for (j = 0; j < words; j++)
14408 addr = plus_constant (Pmode, dst,
14409 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14410 mem = adjust_automodify_address (dstbase, SImode, addr,
14411 dstoffset + j * UNITS_PER_WORD);
14412 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14414 dstoffset += words * UNITS_PER_WORD;
14417 remaining -= words * UNITS_PER_WORD;
14419 gcc_assert (remaining < 4);
14421 /* Copy a halfword if necessary. */
14423 if (remaining >= 2)
14425 halfword_tmp = gen_reg_rtx (SImode);
14427 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14428 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14429 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14431 /* Either write out immediately, or delay until we've loaded the last
14432 byte, depending on interleave factor. */
14433 if (interleave_factor == 1)
14435 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14436 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14437 emit_insn (gen_unaligned_storehi (mem,
14438 gen_lowpart (HImode, halfword_tmp)));
14439 halfword_tmp = NULL;
14440 dstoffset += 2;
14443 remaining -= 2;
14444 srcoffset += 2;
14447 gcc_assert (remaining < 2);
14449 /* Copy last byte. */
14451 if ((remaining & 1) != 0)
14453 byte_tmp = gen_reg_rtx (SImode);
14455 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14456 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14457 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14459 if (interleave_factor == 1)
14461 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14462 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14463 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14464 byte_tmp = NULL;
14465 dstoffset++;
14468 remaining--;
14469 srcoffset++;
14472 /* Store last halfword if we haven't done so already. */
14474 if (halfword_tmp)
14476 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14477 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14478 emit_insn (gen_unaligned_storehi (mem,
14479 gen_lowpart (HImode, halfword_tmp)));
14480 dstoffset += 2;
14483 /* Likewise for last byte. */
14485 if (byte_tmp)
14487 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14488 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14489 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14490 dstoffset++;
14493 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14496 /* From mips_adjust_block_mem:
14498 Helper function for doing a loop-based block operation on memory
14499 reference MEM. Each iteration of the loop will operate on LENGTH
14500 bytes of MEM.
14502 Create a new base register for use within the loop and point it to
14503 the start of MEM. Create a new memory reference that uses this
14504 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14506 static void
14507 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14508 rtx *loop_mem)
14510 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14512 /* Although the new mem does not refer to a known location,
14513 it does keep up to LENGTH bytes of alignment. */
14514 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14515 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14518 /* From mips_block_move_loop:
14520 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14521 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14522 the memory regions do not overlap. */
14524 static void
14525 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14526 unsigned int interleave_factor,
14527 HOST_WIDE_INT bytes_per_iter)
14529 rtx src_reg, dest_reg, final_src, test;
14530 HOST_WIDE_INT leftover;
14532 leftover = length % bytes_per_iter;
14533 length -= leftover;
14535 /* Create registers and memory references for use within the loop. */
14536 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14537 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14539 /* Calculate the value that SRC_REG should have after the last iteration of
14540 the loop. */
14541 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14542 0, 0, OPTAB_WIDEN);
14544 /* Emit the start of the loop. */
14545 rtx_code_label *label = gen_label_rtx ();
14546 emit_label (label);
14548 /* Emit the loop body. */
14549 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14550 interleave_factor);
14552 /* Move on to the next block. */
14553 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14554 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14556 /* Emit the loop condition. */
14557 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14558 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14560 /* Mop up any left-over bytes. */
14561 if (leftover)
14562 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14565 /* Emit a block move when either the source or destination is unaligned (not
14566 aligned to a four-byte boundary). This may need further tuning depending on
14567 core type, optimize_size setting, etc. */
14569 static int
14570 arm_movmemqi_unaligned (rtx *operands)
14572 HOST_WIDE_INT length = INTVAL (operands[2]);
14574 if (optimize_size)
14576 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14577 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14578 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14579 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14580 or dst_aligned though: allow more interleaving in those cases since the
14581 resulting code can be smaller. */
14582 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14583 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14585 if (length > 12)
14586 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14587 interleave_factor, bytes_per_iter);
14588 else
14589 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14590 interleave_factor);
14592 else
14594 /* Note that the loop created by arm_block_move_unaligned_loop may be
14595 subject to loop unrolling, which makes tuning this condition a little
14596 redundant. */
14597 if (length > 32)
14598 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14599 else
14600 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14603 return 1;
14607 arm_gen_movmemqi (rtx *operands)
14609 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14610 HOST_WIDE_INT srcoffset, dstoffset;
14611 int i;
14612 rtx src, dst, srcbase, dstbase;
14613 rtx part_bytes_reg = NULL;
14614 rtx mem;
14616 if (!CONST_INT_P (operands[2])
14617 || !CONST_INT_P (operands[3])
14618 || INTVAL (operands[2]) > 64)
14619 return 0;
14621 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14622 return arm_movmemqi_unaligned (operands);
14624 if (INTVAL (operands[3]) & 3)
14625 return 0;
14627 dstbase = operands[0];
14628 srcbase = operands[1];
14630 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14631 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14633 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14634 out_words_to_go = INTVAL (operands[2]) / 4;
14635 last_bytes = INTVAL (operands[2]) & 3;
14636 dstoffset = srcoffset = 0;
14638 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14639 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14641 for (i = 0; in_words_to_go >= 2; i+=4)
14643 if (in_words_to_go > 4)
14644 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14645 TRUE, srcbase, &srcoffset));
14646 else
14647 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14648 src, FALSE, srcbase,
14649 &srcoffset));
14651 if (out_words_to_go)
14653 if (out_words_to_go > 4)
14654 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14655 TRUE, dstbase, &dstoffset));
14656 else if (out_words_to_go != 1)
14657 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14658 out_words_to_go, dst,
14659 (last_bytes == 0
14660 ? FALSE : TRUE),
14661 dstbase, &dstoffset));
14662 else
14664 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14665 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
14666 if (last_bytes != 0)
14668 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14669 dstoffset += 4;
14674 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14675 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14678 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14679 if (out_words_to_go)
14681 rtx sreg;
14683 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14684 sreg = copy_to_reg (mem);
14686 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14687 emit_move_insn (mem, sreg);
14688 in_words_to_go--;
14690 gcc_assert (!in_words_to_go); /* Sanity check */
14693 if (in_words_to_go)
14695 gcc_assert (in_words_to_go > 0);
14697 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14698 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14701 gcc_assert (!last_bytes || part_bytes_reg);
14703 if (BYTES_BIG_ENDIAN && last_bytes)
14705 rtx tmp = gen_reg_rtx (SImode);
14707 /* The bytes we want are in the top end of the word. */
14708 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14709 GEN_INT (8 * (4 - last_bytes))));
14710 part_bytes_reg = tmp;
14712 while (last_bytes)
14714 mem = adjust_automodify_address (dstbase, QImode,
14715 plus_constant (Pmode, dst,
14716 last_bytes - 1),
14717 dstoffset + last_bytes - 1);
14718 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14720 if (--last_bytes)
14722 tmp = gen_reg_rtx (SImode);
14723 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14724 part_bytes_reg = tmp;
14729 else
14731 if (last_bytes > 1)
14733 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14734 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14735 last_bytes -= 2;
14736 if (last_bytes)
14738 rtx tmp = gen_reg_rtx (SImode);
14739 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14740 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14741 part_bytes_reg = tmp;
14742 dstoffset += 2;
14746 if (last_bytes)
14748 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14749 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14753 return 1;
14756 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14757 by mode size. */
14758 inline static rtx
14759 next_consecutive_mem (rtx mem)
14761 machine_mode mode = GET_MODE (mem);
14762 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14763 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14765 return adjust_automodify_address (mem, mode, addr, offset);
14768 /* Copy using LDRD/STRD instructions whenever possible.
14769 Returns true upon success. */
14770 bool
14771 gen_movmem_ldrd_strd (rtx *operands)
14773 unsigned HOST_WIDE_INT len;
14774 HOST_WIDE_INT align;
14775 rtx src, dst, base;
14776 rtx reg0;
14777 bool src_aligned, dst_aligned;
14778 bool src_volatile, dst_volatile;
14780 gcc_assert (CONST_INT_P (operands[2]));
14781 gcc_assert (CONST_INT_P (operands[3]));
14783 len = UINTVAL (operands[2]);
14784 if (len > 64)
14785 return false;
14787 /* Maximum alignment we can assume for both src and dst buffers. */
14788 align = INTVAL (operands[3]);
14790 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14791 return false;
14793 /* Place src and dst addresses in registers
14794 and update the corresponding mem rtx. */
14795 dst = operands[0];
14796 dst_volatile = MEM_VOLATILE_P (dst);
14797 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14798 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14799 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14801 src = operands[1];
14802 src_volatile = MEM_VOLATILE_P (src);
14803 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14804 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14805 src = adjust_automodify_address (src, VOIDmode, base, 0);
14807 if (!unaligned_access && !(src_aligned && dst_aligned))
14808 return false;
14810 if (src_volatile || dst_volatile)
14811 return false;
14813 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14814 if (!(dst_aligned || src_aligned))
14815 return arm_gen_movmemqi (operands);
14817 src = adjust_address (src, DImode, 0);
14818 dst = adjust_address (dst, DImode, 0);
14819 while (len >= 8)
14821 len -= 8;
14822 reg0 = gen_reg_rtx (DImode);
14823 if (src_aligned)
14824 emit_move_insn (reg0, src);
14825 else
14826 emit_insn (gen_unaligned_loaddi (reg0, src));
14828 if (dst_aligned)
14829 emit_move_insn (dst, reg0);
14830 else
14831 emit_insn (gen_unaligned_storedi (dst, reg0));
14833 src = next_consecutive_mem (src);
14834 dst = next_consecutive_mem (dst);
14837 gcc_assert (len < 8);
14838 if (len >= 4)
14840 /* More than a word but less than a double-word to copy. Copy a word. */
14841 reg0 = gen_reg_rtx (SImode);
14842 src = adjust_address (src, SImode, 0);
14843 dst = adjust_address (dst, SImode, 0);
14844 if (src_aligned)
14845 emit_move_insn (reg0, src);
14846 else
14847 emit_insn (gen_unaligned_loadsi (reg0, src));
14849 if (dst_aligned)
14850 emit_move_insn (dst, reg0);
14851 else
14852 emit_insn (gen_unaligned_storesi (dst, reg0));
14854 src = next_consecutive_mem (src);
14855 dst = next_consecutive_mem (dst);
14856 len -= 4;
14859 if (len == 0)
14860 return true;
14862 /* Copy the remaining bytes. */
14863 if (len >= 2)
14865 dst = adjust_address (dst, HImode, 0);
14866 src = adjust_address (src, HImode, 0);
14867 reg0 = gen_reg_rtx (SImode);
14868 if (src_aligned)
14869 emit_insn (gen_zero_extendhisi2 (reg0, src));
14870 else
14871 emit_insn (gen_unaligned_loadhiu (reg0, src));
14873 if (dst_aligned)
14874 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14875 else
14876 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14878 src = next_consecutive_mem (src);
14879 dst = next_consecutive_mem (dst);
14880 if (len == 2)
14881 return true;
14884 dst = adjust_address (dst, QImode, 0);
14885 src = adjust_address (src, QImode, 0);
14886 reg0 = gen_reg_rtx (QImode);
14887 emit_move_insn (reg0, src);
14888 emit_move_insn (dst, reg0);
14889 return true;
14892 /* Select a dominance comparison mode if possible for a test of the general
14893 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14894 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14895 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14896 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14897 In all cases OP will be either EQ or NE, but we don't need to know which
14898 here. If we are unable to support a dominance comparison we return
14899 CC mode. This will then fail to match for the RTL expressions that
14900 generate this call. */
14901 machine_mode
14902 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14904 enum rtx_code cond1, cond2;
14905 int swapped = 0;
14907 /* Currently we will probably get the wrong result if the individual
14908 comparisons are not simple. This also ensures that it is safe to
14909 reverse a comparison if necessary. */
14910 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14911 != CCmode)
14912 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14913 != CCmode))
14914 return CCmode;
14916 /* The if_then_else variant of this tests the second condition if the
14917 first passes, but is true if the first fails. Reverse the first
14918 condition to get a true "inclusive-or" expression. */
14919 if (cond_or == DOM_CC_NX_OR_Y)
14920 cond1 = reverse_condition (cond1);
14922 /* If the comparisons are not equal, and one doesn't dominate the other,
14923 then we can't do this. */
14924 if (cond1 != cond2
14925 && !comparison_dominates_p (cond1, cond2)
14926 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14927 return CCmode;
14929 if (swapped)
14931 enum rtx_code temp = cond1;
14932 cond1 = cond2;
14933 cond2 = temp;
14936 switch (cond1)
14938 case EQ:
14939 if (cond_or == DOM_CC_X_AND_Y)
14940 return CC_DEQmode;
14942 switch (cond2)
14944 case EQ: return CC_DEQmode;
14945 case LE: return CC_DLEmode;
14946 case LEU: return CC_DLEUmode;
14947 case GE: return CC_DGEmode;
14948 case GEU: return CC_DGEUmode;
14949 default: gcc_unreachable ();
14952 case LT:
14953 if (cond_or == DOM_CC_X_AND_Y)
14954 return CC_DLTmode;
14956 switch (cond2)
14958 case LT:
14959 return CC_DLTmode;
14960 case LE:
14961 return CC_DLEmode;
14962 case NE:
14963 return CC_DNEmode;
14964 default:
14965 gcc_unreachable ();
14968 case GT:
14969 if (cond_or == DOM_CC_X_AND_Y)
14970 return CC_DGTmode;
14972 switch (cond2)
14974 case GT:
14975 return CC_DGTmode;
14976 case GE:
14977 return CC_DGEmode;
14978 case NE:
14979 return CC_DNEmode;
14980 default:
14981 gcc_unreachable ();
14984 case LTU:
14985 if (cond_or == DOM_CC_X_AND_Y)
14986 return CC_DLTUmode;
14988 switch (cond2)
14990 case LTU:
14991 return CC_DLTUmode;
14992 case LEU:
14993 return CC_DLEUmode;
14994 case NE:
14995 return CC_DNEmode;
14996 default:
14997 gcc_unreachable ();
15000 case GTU:
15001 if (cond_or == DOM_CC_X_AND_Y)
15002 return CC_DGTUmode;
15004 switch (cond2)
15006 case GTU:
15007 return CC_DGTUmode;
15008 case GEU:
15009 return CC_DGEUmode;
15010 case NE:
15011 return CC_DNEmode;
15012 default:
15013 gcc_unreachable ();
15016 /* The remaining cases only occur when both comparisons are the
15017 same. */
15018 case NE:
15019 gcc_assert (cond1 == cond2);
15020 return CC_DNEmode;
15022 case LE:
15023 gcc_assert (cond1 == cond2);
15024 return CC_DLEmode;
15026 case GE:
15027 gcc_assert (cond1 == cond2);
15028 return CC_DGEmode;
15030 case LEU:
15031 gcc_assert (cond1 == cond2);
15032 return CC_DLEUmode;
15034 case GEU:
15035 gcc_assert (cond1 == cond2);
15036 return CC_DGEUmode;
15038 default:
15039 gcc_unreachable ();
15043 machine_mode
15044 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15046 /* All floating point compares return CCFP if it is an equality
15047 comparison, and CCFPE otherwise. */
15048 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15050 switch (op)
15052 case EQ:
15053 case NE:
15054 case UNORDERED:
15055 case ORDERED:
15056 case UNLT:
15057 case UNLE:
15058 case UNGT:
15059 case UNGE:
15060 case UNEQ:
15061 case LTGT:
15062 return CCFPmode;
15064 case LT:
15065 case LE:
15066 case GT:
15067 case GE:
15068 return CCFPEmode;
15070 default:
15071 gcc_unreachable ();
15075 /* A compare with a shifted operand. Because of canonicalization, the
15076 comparison will have to be swapped when we emit the assembler. */
15077 if (GET_MODE (y) == SImode
15078 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15079 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15080 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15081 || GET_CODE (x) == ROTATERT))
15082 return CC_SWPmode;
15084 /* This operation is performed swapped, but since we only rely on the Z
15085 flag we don't need an additional mode. */
15086 if (GET_MODE (y) == SImode
15087 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15088 && GET_CODE (x) == NEG
15089 && (op == EQ || op == NE))
15090 return CC_Zmode;
15092 /* This is a special case that is used by combine to allow a
15093 comparison of a shifted byte load to be split into a zero-extend
15094 followed by a comparison of the shifted integer (only valid for
15095 equalities and unsigned inequalities). */
15096 if (GET_MODE (x) == SImode
15097 && GET_CODE (x) == ASHIFT
15098 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15099 && GET_CODE (XEXP (x, 0)) == SUBREG
15100 && MEM_P (SUBREG_REG (XEXP (x, 0)))
15101 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15102 && (op == EQ || op == NE
15103 || op == GEU || op == GTU || op == LTU || op == LEU)
15104 && CONST_INT_P (y))
15105 return CC_Zmode;
15107 /* A construct for a conditional compare, if the false arm contains
15108 0, then both conditions must be true, otherwise either condition
15109 must be true. Not all conditions are possible, so CCmode is
15110 returned if it can't be done. */
15111 if (GET_CODE (x) == IF_THEN_ELSE
15112 && (XEXP (x, 2) == const0_rtx
15113 || XEXP (x, 2) == const1_rtx)
15114 && COMPARISON_P (XEXP (x, 0))
15115 && COMPARISON_P (XEXP (x, 1)))
15116 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15117 INTVAL (XEXP (x, 2)));
15119 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15120 if (GET_CODE (x) == AND
15121 && (op == EQ || op == NE)
15122 && COMPARISON_P (XEXP (x, 0))
15123 && COMPARISON_P (XEXP (x, 1)))
15124 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15125 DOM_CC_X_AND_Y);
15127 if (GET_CODE (x) == IOR
15128 && (op == EQ || op == NE)
15129 && COMPARISON_P (XEXP (x, 0))
15130 && COMPARISON_P (XEXP (x, 1)))
15131 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15132 DOM_CC_X_OR_Y);
15134 /* An operation (on Thumb) where we want to test for a single bit.
15135 This is done by shifting that bit up into the top bit of a
15136 scratch register; we can then branch on the sign bit. */
15137 if (TARGET_THUMB1
15138 && GET_MODE (x) == SImode
15139 && (op == EQ || op == NE)
15140 && GET_CODE (x) == ZERO_EXTRACT
15141 && XEXP (x, 1) == const1_rtx)
15142 return CC_Nmode;
15144 /* An operation that sets the condition codes as a side-effect, the
15145 V flag is not set correctly, so we can only use comparisons where
15146 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15147 instead.) */
15148 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15149 if (GET_MODE (x) == SImode
15150 && y == const0_rtx
15151 && (op == EQ || op == NE || op == LT || op == GE)
15152 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15153 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15154 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15155 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15156 || GET_CODE (x) == LSHIFTRT
15157 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15158 || GET_CODE (x) == ROTATERT
15159 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15160 return CC_NOOVmode;
15162 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15163 return CC_Zmode;
15165 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15166 && GET_CODE (x) == PLUS
15167 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15168 return CC_Cmode;
15170 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15172 switch (op)
15174 case EQ:
15175 case NE:
15176 /* A DImode comparison against zero can be implemented by
15177 or'ing the two halves together. */
15178 if (y == const0_rtx)
15179 return CC_Zmode;
15181 /* We can do an equality test in three Thumb instructions. */
15182 if (!TARGET_32BIT)
15183 return CC_Zmode;
15185 /* FALLTHROUGH */
15187 case LTU:
15188 case LEU:
15189 case GTU:
15190 case GEU:
15191 /* DImode unsigned comparisons can be implemented by cmp +
15192 cmpeq without a scratch register. Not worth doing in
15193 Thumb-2. */
15194 if (TARGET_32BIT)
15195 return CC_CZmode;
15197 /* FALLTHROUGH */
15199 case LT:
15200 case LE:
15201 case GT:
15202 case GE:
15203 /* DImode signed and unsigned comparisons can be implemented
15204 by cmp + sbcs with a scratch register, but that does not
15205 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15206 gcc_assert (op != EQ && op != NE);
15207 return CC_NCVmode;
15209 default:
15210 gcc_unreachable ();
15214 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15215 return GET_MODE (x);
15217 return CCmode;
15220 /* X and Y are two things to compare using CODE. Emit the compare insn and
15221 return the rtx for register 0 in the proper mode. FP means this is a
15222 floating point compare: I don't think that it is needed on the arm. */
15224 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15226 machine_mode mode;
15227 rtx cc_reg;
15228 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15230 /* We might have X as a constant, Y as a register because of the predicates
15231 used for cmpdi. If so, force X to a register here. */
15232 if (dimode_comparison && !REG_P (x))
15233 x = force_reg (DImode, x);
15235 mode = SELECT_CC_MODE (code, x, y);
15236 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15238 if (dimode_comparison
15239 && mode != CC_CZmode)
15241 rtx clobber, set;
15243 /* To compare two non-zero values for equality, XOR them and
15244 then compare against zero. Not used for ARM mode; there
15245 CC_CZmode is cheaper. */
15246 if (mode == CC_Zmode && y != const0_rtx)
15248 gcc_assert (!reload_completed);
15249 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15250 y = const0_rtx;
15253 /* A scratch register is required. */
15254 if (reload_completed)
15255 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15256 else
15257 scratch = gen_rtx_SCRATCH (SImode);
15259 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15260 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
15261 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15263 else
15264 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15266 return cc_reg;
15269 /* Generate a sequence of insns that will generate the correct return
15270 address mask depending on the physical architecture that the program
15271 is running on. */
15273 arm_gen_return_addr_mask (void)
15275 rtx reg = gen_reg_rtx (Pmode);
15277 emit_insn (gen_return_addr_mask (reg));
15278 return reg;
15281 void
15282 arm_reload_in_hi (rtx *operands)
15284 rtx ref = operands[1];
15285 rtx base, scratch;
15286 HOST_WIDE_INT offset = 0;
15288 if (GET_CODE (ref) == SUBREG)
15290 offset = SUBREG_BYTE (ref);
15291 ref = SUBREG_REG (ref);
15294 if (REG_P (ref))
15296 /* We have a pseudo which has been spilt onto the stack; there
15297 are two cases here: the first where there is a simple
15298 stack-slot replacement and a second where the stack-slot is
15299 out of range, or is used as a subreg. */
15300 if (reg_equiv_mem (REGNO (ref)))
15302 ref = reg_equiv_mem (REGNO (ref));
15303 base = find_replacement (&XEXP (ref, 0));
15305 else
15306 /* The slot is out of range, or was dressed up in a SUBREG. */
15307 base = reg_equiv_address (REGNO (ref));
15309 else
15310 base = find_replacement (&XEXP (ref, 0));
15312 /* Handle the case where the address is too complex to be offset by 1. */
15313 if (GET_CODE (base) == MINUS
15314 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15316 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15318 emit_set_insn (base_plus, base);
15319 base = base_plus;
15321 else if (GET_CODE (base) == PLUS)
15323 /* The addend must be CONST_INT, or we would have dealt with it above. */
15324 HOST_WIDE_INT hi, lo;
15326 offset += INTVAL (XEXP (base, 1));
15327 base = XEXP (base, 0);
15329 /* Rework the address into a legal sequence of insns. */
15330 /* Valid range for lo is -4095 -> 4095 */
15331 lo = (offset >= 0
15332 ? (offset & 0xfff)
15333 : -((-offset) & 0xfff));
15335 /* Corner case, if lo is the max offset then we would be out of range
15336 once we have added the additional 1 below, so bump the msb into the
15337 pre-loading insn(s). */
15338 if (lo == 4095)
15339 lo &= 0x7ff;
15341 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15342 ^ (HOST_WIDE_INT) 0x80000000)
15343 - (HOST_WIDE_INT) 0x80000000);
15345 gcc_assert (hi + lo == offset);
15347 if (hi != 0)
15349 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15351 /* Get the base address; addsi3 knows how to handle constants
15352 that require more than one insn. */
15353 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15354 base = base_plus;
15355 offset = lo;
15359 /* Operands[2] may overlap operands[0] (though it won't overlap
15360 operands[1]), that's why we asked for a DImode reg -- so we can
15361 use the bit that does not overlap. */
15362 if (REGNO (operands[2]) == REGNO (operands[0]))
15363 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15364 else
15365 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15367 emit_insn (gen_zero_extendqisi2 (scratch,
15368 gen_rtx_MEM (QImode,
15369 plus_constant (Pmode, base,
15370 offset))));
15371 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15372 gen_rtx_MEM (QImode,
15373 plus_constant (Pmode, base,
15374 offset + 1))));
15375 if (!BYTES_BIG_ENDIAN)
15376 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15377 gen_rtx_IOR (SImode,
15378 gen_rtx_ASHIFT
15379 (SImode,
15380 gen_rtx_SUBREG (SImode, operands[0], 0),
15381 GEN_INT (8)),
15382 scratch));
15383 else
15384 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15385 gen_rtx_IOR (SImode,
15386 gen_rtx_ASHIFT (SImode, scratch,
15387 GEN_INT (8)),
15388 gen_rtx_SUBREG (SImode, operands[0], 0)));
15391 /* Handle storing a half-word to memory during reload by synthesizing as two
15392 byte stores. Take care not to clobber the input values until after we
15393 have moved them somewhere safe. This code assumes that if the DImode
15394 scratch in operands[2] overlaps either the input value or output address
15395 in some way, then that value must die in this insn (we absolutely need
15396 two scratch registers for some corner cases). */
15397 void
15398 arm_reload_out_hi (rtx *operands)
15400 rtx ref = operands[0];
15401 rtx outval = operands[1];
15402 rtx base, scratch;
15403 HOST_WIDE_INT offset = 0;
15405 if (GET_CODE (ref) == SUBREG)
15407 offset = SUBREG_BYTE (ref);
15408 ref = SUBREG_REG (ref);
15411 if (REG_P (ref))
15413 /* We have a pseudo which has been spilt onto the stack; there
15414 are two cases here: the first where there is a simple
15415 stack-slot replacement and a second where the stack-slot is
15416 out of range, or is used as a subreg. */
15417 if (reg_equiv_mem (REGNO (ref)))
15419 ref = reg_equiv_mem (REGNO (ref));
15420 base = find_replacement (&XEXP (ref, 0));
15422 else
15423 /* The slot is out of range, or was dressed up in a SUBREG. */
15424 base = reg_equiv_address (REGNO (ref));
15426 else
15427 base = find_replacement (&XEXP (ref, 0));
15429 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15431 /* Handle the case where the address is too complex to be offset by 1. */
15432 if (GET_CODE (base) == MINUS
15433 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15435 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15437 /* Be careful not to destroy OUTVAL. */
15438 if (reg_overlap_mentioned_p (base_plus, outval))
15440 /* Updating base_plus might destroy outval, see if we can
15441 swap the scratch and base_plus. */
15442 if (!reg_overlap_mentioned_p (scratch, outval))
15444 rtx tmp = scratch;
15445 scratch = base_plus;
15446 base_plus = tmp;
15448 else
15450 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15452 /* Be conservative and copy OUTVAL into the scratch now,
15453 this should only be necessary if outval is a subreg
15454 of something larger than a word. */
15455 /* XXX Might this clobber base? I can't see how it can,
15456 since scratch is known to overlap with OUTVAL, and
15457 must be wider than a word. */
15458 emit_insn (gen_movhi (scratch_hi, outval));
15459 outval = scratch_hi;
15463 emit_set_insn (base_plus, base);
15464 base = base_plus;
15466 else if (GET_CODE (base) == PLUS)
15468 /* The addend must be CONST_INT, or we would have dealt with it above. */
15469 HOST_WIDE_INT hi, lo;
15471 offset += INTVAL (XEXP (base, 1));
15472 base = XEXP (base, 0);
15474 /* Rework the address into a legal sequence of insns. */
15475 /* Valid range for lo is -4095 -> 4095 */
15476 lo = (offset >= 0
15477 ? (offset & 0xfff)
15478 : -((-offset) & 0xfff));
15480 /* Corner case, if lo is the max offset then we would be out of range
15481 once we have added the additional 1 below, so bump the msb into the
15482 pre-loading insn(s). */
15483 if (lo == 4095)
15484 lo &= 0x7ff;
15486 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15487 ^ (HOST_WIDE_INT) 0x80000000)
15488 - (HOST_WIDE_INT) 0x80000000);
15490 gcc_assert (hi + lo == offset);
15492 if (hi != 0)
15494 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15496 /* Be careful not to destroy OUTVAL. */
15497 if (reg_overlap_mentioned_p (base_plus, outval))
15499 /* Updating base_plus might destroy outval, see if we
15500 can swap the scratch and base_plus. */
15501 if (!reg_overlap_mentioned_p (scratch, outval))
15503 rtx tmp = scratch;
15504 scratch = base_plus;
15505 base_plus = tmp;
15507 else
15509 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15511 /* Be conservative and copy outval into scratch now,
15512 this should only be necessary if outval is a
15513 subreg of something larger than a word. */
15514 /* XXX Might this clobber base? I can't see how it
15515 can, since scratch is known to overlap with
15516 outval. */
15517 emit_insn (gen_movhi (scratch_hi, outval));
15518 outval = scratch_hi;
15522 /* Get the base address; addsi3 knows how to handle constants
15523 that require more than one insn. */
15524 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15525 base = base_plus;
15526 offset = lo;
15530 if (BYTES_BIG_ENDIAN)
15532 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15533 plus_constant (Pmode, base,
15534 offset + 1)),
15535 gen_lowpart (QImode, outval)));
15536 emit_insn (gen_lshrsi3 (scratch,
15537 gen_rtx_SUBREG (SImode, outval, 0),
15538 GEN_INT (8)));
15539 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15540 offset)),
15541 gen_lowpart (QImode, scratch)));
15543 else
15545 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15546 offset)),
15547 gen_lowpart (QImode, outval)));
15548 emit_insn (gen_lshrsi3 (scratch,
15549 gen_rtx_SUBREG (SImode, outval, 0),
15550 GEN_INT (8)));
15551 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15552 plus_constant (Pmode, base,
15553 offset + 1)),
15554 gen_lowpart (QImode, scratch)));
15558 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15559 (padded to the size of a word) should be passed in a register. */
15561 static bool
15562 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15564 if (TARGET_AAPCS_BASED)
15565 return must_pass_in_stack_var_size (mode, type);
15566 else
15567 return must_pass_in_stack_var_size_or_pad (mode, type);
15571 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15572 Return true if an argument passed on the stack should be padded upwards,
15573 i.e. if the least-significant byte has useful data.
15574 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15575 aggregate types are placed in the lowest memory address. */
15577 bool
15578 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15580 if (!TARGET_AAPCS_BASED)
15581 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15583 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15584 return false;
15586 return true;
15590 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15591 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15592 register has useful data, and return the opposite if the most
15593 significant byte does. */
15595 bool
15596 arm_pad_reg_upward (machine_mode mode,
15597 tree type, int first ATTRIBUTE_UNUSED)
15599 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15601 /* For AAPCS, small aggregates, small fixed-point types,
15602 and small complex types are always padded upwards. */
15603 if (type)
15605 if ((AGGREGATE_TYPE_P (type)
15606 || TREE_CODE (type) == COMPLEX_TYPE
15607 || FIXED_POINT_TYPE_P (type))
15608 && int_size_in_bytes (type) <= 4)
15609 return true;
15611 else
15613 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15614 && GET_MODE_SIZE (mode) <= 4)
15615 return true;
15619 /* Otherwise, use default padding. */
15620 return !BYTES_BIG_ENDIAN;
15623 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15624 assuming that the address in the base register is word aligned. */
15625 bool
15626 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15628 HOST_WIDE_INT max_offset;
15630 /* Offset must be a multiple of 4 in Thumb mode. */
15631 if (TARGET_THUMB2 && ((offset & 3) != 0))
15632 return false;
15634 if (TARGET_THUMB2)
15635 max_offset = 1020;
15636 else if (TARGET_ARM)
15637 max_offset = 255;
15638 else
15639 return false;
15641 return ((offset <= max_offset) && (offset >= -max_offset));
15644 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15645 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15646 Assumes that the address in the base register RN is word aligned. Pattern
15647 guarantees that both memory accesses use the same base register,
15648 the offsets are constants within the range, and the gap between the offsets is 4.
15649 If preload complete then check that registers are legal. WBACK indicates whether
15650 address is updated. LOAD indicates whether memory access is load or store. */
15651 bool
15652 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15653 bool wback, bool load)
15655 unsigned int t, t2, n;
15657 if (!reload_completed)
15658 return true;
15660 if (!offset_ok_for_ldrd_strd (offset))
15661 return false;
15663 t = REGNO (rt);
15664 t2 = REGNO (rt2);
15665 n = REGNO (rn);
15667 if ((TARGET_THUMB2)
15668 && ((wback && (n == t || n == t2))
15669 || (t == SP_REGNUM)
15670 || (t == PC_REGNUM)
15671 || (t2 == SP_REGNUM)
15672 || (t2 == PC_REGNUM)
15673 || (!load && (n == PC_REGNUM))
15674 || (load && (t == t2))
15675 /* Triggers Cortex-M3 LDRD errata. */
15676 || (!wback && load && fix_cm3_ldrd && (n == t))))
15677 return false;
15679 if ((TARGET_ARM)
15680 && ((wback && (n == t || n == t2))
15681 || (t2 == PC_REGNUM)
15682 || (t % 2 != 0) /* First destination register is not even. */
15683 || (t2 != t + 1)
15684 /* PC can be used as base register (for offset addressing only),
15685 but it is depricated. */
15686 || (n == PC_REGNUM)))
15687 return false;
15689 return true;
15692 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15693 operand MEM's address contains an immediate offset from the base
15694 register and has no side effects, in which case it sets BASE and
15695 OFFSET accordingly. */
15696 static bool
15697 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15699 rtx addr;
15701 gcc_assert (base != NULL && offset != NULL);
15703 /* TODO: Handle more general memory operand patterns, such as
15704 PRE_DEC and PRE_INC. */
15706 if (side_effects_p (mem))
15707 return false;
15709 /* Can't deal with subregs. */
15710 if (GET_CODE (mem) == SUBREG)
15711 return false;
15713 gcc_assert (MEM_P (mem));
15715 *offset = const0_rtx;
15717 addr = XEXP (mem, 0);
15719 /* If addr isn't valid for DImode, then we can't handle it. */
15720 if (!arm_legitimate_address_p (DImode, addr,
15721 reload_in_progress || reload_completed))
15722 return false;
15724 if (REG_P (addr))
15726 *base = addr;
15727 return true;
15729 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15731 *base = XEXP (addr, 0);
15732 *offset = XEXP (addr, 1);
15733 return (REG_P (*base) && CONST_INT_P (*offset));
15736 return false;
15739 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15741 /* Called from a peephole2 to replace two word-size accesses with a
15742 single LDRD/STRD instruction. Returns true iff we can generate a
15743 new instruction sequence. That is, both accesses use the same base
15744 register and the gap between constant offsets is 4. This function
15745 may reorder its operands to match ldrd/strd RTL templates.
15746 OPERANDS are the operands found by the peephole matcher;
15747 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15748 corresponding memory operands. LOAD indicaates whether the access
15749 is load or store. CONST_STORE indicates a store of constant
15750 integer values held in OPERANDS[4,5] and assumes that the pattern
15751 is of length 4 insn, for the purpose of checking dead registers.
15752 COMMUTE indicates that register operands may be reordered. */
15753 bool
15754 gen_operands_ldrd_strd (rtx *operands, bool load,
15755 bool const_store, bool commute)
15757 int nops = 2;
15758 HOST_WIDE_INT offsets[2], offset;
15759 rtx base = NULL_RTX;
15760 rtx cur_base, cur_offset, tmp;
15761 int i, gap;
15762 HARD_REG_SET regset;
15764 gcc_assert (!const_store || !load);
15765 /* Check that the memory references are immediate offsets from the
15766 same base register. Extract the base register, the destination
15767 registers, and the corresponding memory offsets. */
15768 for (i = 0; i < nops; i++)
15770 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15771 return false;
15773 if (i == 0)
15774 base = cur_base;
15775 else if (REGNO (base) != REGNO (cur_base))
15776 return false;
15778 offsets[i] = INTVAL (cur_offset);
15779 if (GET_CODE (operands[i]) == SUBREG)
15781 tmp = SUBREG_REG (operands[i]);
15782 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15783 operands[i] = tmp;
15787 /* Make sure there is no dependency between the individual loads. */
15788 if (load && REGNO (operands[0]) == REGNO (base))
15789 return false; /* RAW */
15791 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15792 return false; /* WAW */
15794 /* If the same input register is used in both stores
15795 when storing different constants, try to find a free register.
15796 For example, the code
15797 mov r0, 0
15798 str r0, [r2]
15799 mov r0, 1
15800 str r0, [r2, #4]
15801 can be transformed into
15802 mov r1, 0
15803 strd r1, r0, [r2]
15804 in Thumb mode assuming that r1 is free. */
15805 if (const_store
15806 && REGNO (operands[0]) == REGNO (operands[1])
15807 && INTVAL (operands[4]) != INTVAL (operands[5]))
15809 if (TARGET_THUMB2)
15811 CLEAR_HARD_REG_SET (regset);
15812 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15813 if (tmp == NULL_RTX)
15814 return false;
15816 /* Use the new register in the first load to ensure that
15817 if the original input register is not dead after peephole,
15818 then it will have the correct constant value. */
15819 operands[0] = tmp;
15821 else if (TARGET_ARM)
15823 return false;
15824 int regno = REGNO (operands[0]);
15825 if (!peep2_reg_dead_p (4, operands[0]))
15827 /* When the input register is even and is not dead after the
15828 pattern, it has to hold the second constant but we cannot
15829 form a legal STRD in ARM mode with this register as the second
15830 register. */
15831 if (regno % 2 == 0)
15832 return false;
15834 /* Is regno-1 free? */
15835 SET_HARD_REG_SET (regset);
15836 CLEAR_HARD_REG_BIT(regset, regno - 1);
15837 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15838 if (tmp == NULL_RTX)
15839 return false;
15841 operands[0] = tmp;
15843 else
15845 /* Find a DImode register. */
15846 CLEAR_HARD_REG_SET (regset);
15847 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15848 if (tmp != NULL_RTX)
15850 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15851 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15853 else
15855 /* Can we use the input register to form a DI register? */
15856 SET_HARD_REG_SET (regset);
15857 CLEAR_HARD_REG_BIT(regset,
15858 regno % 2 == 0 ? regno + 1 : regno - 1);
15859 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15860 if (tmp == NULL_RTX)
15861 return false;
15862 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15866 gcc_assert (operands[0] != NULL_RTX);
15867 gcc_assert (operands[1] != NULL_RTX);
15868 gcc_assert (REGNO (operands[0]) % 2 == 0);
15869 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15873 /* Make sure the instructions are ordered with lower memory access first. */
15874 if (offsets[0] > offsets[1])
15876 gap = offsets[0] - offsets[1];
15877 offset = offsets[1];
15879 /* Swap the instructions such that lower memory is accessed first. */
15880 SWAP_RTX (operands[0], operands[1]);
15881 SWAP_RTX (operands[2], operands[3]);
15882 if (const_store)
15883 SWAP_RTX (operands[4], operands[5]);
15885 else
15887 gap = offsets[1] - offsets[0];
15888 offset = offsets[0];
15891 /* Make sure accesses are to consecutive memory locations. */
15892 if (gap != 4)
15893 return false;
15895 /* Make sure we generate legal instructions. */
15896 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15897 false, load))
15898 return true;
15900 /* In Thumb state, where registers are almost unconstrained, there
15901 is little hope to fix it. */
15902 if (TARGET_THUMB2)
15903 return false;
15905 if (load && commute)
15907 /* Try reordering registers. */
15908 SWAP_RTX (operands[0], operands[1]);
15909 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15910 false, load))
15911 return true;
15914 if (const_store)
15916 /* If input registers are dead after this pattern, they can be
15917 reordered or replaced by other registers that are free in the
15918 current pattern. */
15919 if (!peep2_reg_dead_p (4, operands[0])
15920 || !peep2_reg_dead_p (4, operands[1]))
15921 return false;
15923 /* Try to reorder the input registers. */
15924 /* For example, the code
15925 mov r0, 0
15926 mov r1, 1
15927 str r1, [r2]
15928 str r0, [r2, #4]
15929 can be transformed into
15930 mov r1, 0
15931 mov r0, 1
15932 strd r0, [r2]
15934 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15935 false, false))
15937 SWAP_RTX (operands[0], operands[1]);
15938 return true;
15941 /* Try to find a free DI register. */
15942 CLEAR_HARD_REG_SET (regset);
15943 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15944 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15945 while (true)
15947 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15948 if (tmp == NULL_RTX)
15949 return false;
15951 /* DREG must be an even-numbered register in DImode.
15952 Split it into SI registers. */
15953 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15954 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15955 gcc_assert (operands[0] != NULL_RTX);
15956 gcc_assert (operands[1] != NULL_RTX);
15957 gcc_assert (REGNO (operands[0]) % 2 == 0);
15958 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15960 return (operands_ok_ldrd_strd (operands[0], operands[1],
15961 base, offset,
15962 false, load));
15966 return false;
15968 #undef SWAP_RTX
15973 /* Print a symbolic form of X to the debug file, F. */
15974 static void
15975 arm_print_value (FILE *f, rtx x)
15977 switch (GET_CODE (x))
15979 case CONST_INT:
15980 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15981 return;
15983 case CONST_DOUBLE:
15984 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15985 return;
15987 case CONST_VECTOR:
15989 int i;
15991 fprintf (f, "<");
15992 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15994 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15995 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15996 fputc (',', f);
15998 fprintf (f, ">");
16000 return;
16002 case CONST_STRING:
16003 fprintf (f, "\"%s\"", XSTR (x, 0));
16004 return;
16006 case SYMBOL_REF:
16007 fprintf (f, "`%s'", XSTR (x, 0));
16008 return;
16010 case LABEL_REF:
16011 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16012 return;
16014 case CONST:
16015 arm_print_value (f, XEXP (x, 0));
16016 return;
16018 case PLUS:
16019 arm_print_value (f, XEXP (x, 0));
16020 fprintf (f, "+");
16021 arm_print_value (f, XEXP (x, 1));
16022 return;
16024 case PC:
16025 fprintf (f, "pc");
16026 return;
16028 default:
16029 fprintf (f, "????");
16030 return;
16034 /* Routines for manipulation of the constant pool. */
16036 /* Arm instructions cannot load a large constant directly into a
16037 register; they have to come from a pc relative load. The constant
16038 must therefore be placed in the addressable range of the pc
16039 relative load. Depending on the precise pc relative load
16040 instruction the range is somewhere between 256 bytes and 4k. This
16041 means that we often have to dump a constant inside a function, and
16042 generate code to branch around it.
16044 It is important to minimize this, since the branches will slow
16045 things down and make the code larger.
16047 Normally we can hide the table after an existing unconditional
16048 branch so that there is no interruption of the flow, but in the
16049 worst case the code looks like this:
16051 ldr rn, L1
16053 b L2
16054 align
16055 L1: .long value
16059 ldr rn, L3
16061 b L4
16062 align
16063 L3: .long value
16067 We fix this by performing a scan after scheduling, which notices
16068 which instructions need to have their operands fetched from the
16069 constant table and builds the table.
16071 The algorithm starts by building a table of all the constants that
16072 need fixing up and all the natural barriers in the function (places
16073 where a constant table can be dropped without breaking the flow).
16074 For each fixup we note how far the pc-relative replacement will be
16075 able to reach and the offset of the instruction into the function.
16077 Having built the table we then group the fixes together to form
16078 tables that are as large as possible (subject to addressing
16079 constraints) and emit each table of constants after the last
16080 barrier that is within range of all the instructions in the group.
16081 If a group does not contain a barrier, then we forcibly create one
16082 by inserting a jump instruction into the flow. Once the table has
16083 been inserted, the insns are then modified to reference the
16084 relevant entry in the pool.
16086 Possible enhancements to the algorithm (not implemented) are:
16088 1) For some processors and object formats, there may be benefit in
16089 aligning the pools to the start of cache lines; this alignment
16090 would need to be taken into account when calculating addressability
16091 of a pool. */
16093 /* These typedefs are located at the start of this file, so that
16094 they can be used in the prototypes there. This comment is to
16095 remind readers of that fact so that the following structures
16096 can be understood more easily.
16098 typedef struct minipool_node Mnode;
16099 typedef struct minipool_fixup Mfix; */
16101 struct minipool_node
16103 /* Doubly linked chain of entries. */
16104 Mnode * next;
16105 Mnode * prev;
16106 /* The maximum offset into the code that this entry can be placed. While
16107 pushing fixes for forward references, all entries are sorted in order
16108 of increasing max_address. */
16109 HOST_WIDE_INT max_address;
16110 /* Similarly for an entry inserted for a backwards ref. */
16111 HOST_WIDE_INT min_address;
16112 /* The number of fixes referencing this entry. This can become zero
16113 if we "unpush" an entry. In this case we ignore the entry when we
16114 come to emit the code. */
16115 int refcount;
16116 /* The offset from the start of the minipool. */
16117 HOST_WIDE_INT offset;
16118 /* The value in table. */
16119 rtx value;
16120 /* The mode of value. */
16121 machine_mode mode;
16122 /* The size of the value. With iWMMXt enabled
16123 sizes > 4 also imply an alignment of 8-bytes. */
16124 int fix_size;
16127 struct minipool_fixup
16129 Mfix * next;
16130 rtx_insn * insn;
16131 HOST_WIDE_INT address;
16132 rtx * loc;
16133 machine_mode mode;
16134 int fix_size;
16135 rtx value;
16136 Mnode * minipool;
16137 HOST_WIDE_INT forwards;
16138 HOST_WIDE_INT backwards;
16141 /* Fixes less than a word need padding out to a word boundary. */
16142 #define MINIPOOL_FIX_SIZE(mode) \
16143 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16145 static Mnode * minipool_vector_head;
16146 static Mnode * minipool_vector_tail;
16147 static rtx_code_label *minipool_vector_label;
16148 static int minipool_pad;
16150 /* The linked list of all minipool fixes required for this function. */
16151 Mfix * minipool_fix_head;
16152 Mfix * minipool_fix_tail;
16153 /* The fix entry for the current minipool, once it has been placed. */
16154 Mfix * minipool_barrier;
16156 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16157 #define JUMP_TABLES_IN_TEXT_SECTION 0
16158 #endif
16160 static HOST_WIDE_INT
16161 get_jump_table_size (rtx_jump_table_data *insn)
16163 /* ADDR_VECs only take room if read-only data does into the text
16164 section. */
16165 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16167 rtx body = PATTERN (insn);
16168 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16169 HOST_WIDE_INT size;
16170 HOST_WIDE_INT modesize;
16172 modesize = GET_MODE_SIZE (GET_MODE (body));
16173 size = modesize * XVECLEN (body, elt);
16174 switch (modesize)
16176 case 1:
16177 /* Round up size of TBB table to a halfword boundary. */
16178 size = (size + 1) & ~(HOST_WIDE_INT)1;
16179 break;
16180 case 2:
16181 /* No padding necessary for TBH. */
16182 break;
16183 case 4:
16184 /* Add two bytes for alignment on Thumb. */
16185 if (TARGET_THUMB)
16186 size += 2;
16187 break;
16188 default:
16189 gcc_unreachable ();
16191 return size;
16194 return 0;
16197 /* Return the maximum amount of padding that will be inserted before
16198 label LABEL. */
16200 static HOST_WIDE_INT
16201 get_label_padding (rtx label)
16203 HOST_WIDE_INT align, min_insn_size;
16205 align = 1 << label_to_alignment (label);
16206 min_insn_size = TARGET_THUMB ? 2 : 4;
16207 return align > min_insn_size ? align - min_insn_size : 0;
16210 /* Move a minipool fix MP from its current location to before MAX_MP.
16211 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16212 constraints may need updating. */
16213 static Mnode *
16214 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16215 HOST_WIDE_INT max_address)
16217 /* The code below assumes these are different. */
16218 gcc_assert (mp != max_mp);
16220 if (max_mp == NULL)
16222 if (max_address < mp->max_address)
16223 mp->max_address = max_address;
16225 else
16227 if (max_address > max_mp->max_address - mp->fix_size)
16228 mp->max_address = max_mp->max_address - mp->fix_size;
16229 else
16230 mp->max_address = max_address;
16232 /* Unlink MP from its current position. Since max_mp is non-null,
16233 mp->prev must be non-null. */
16234 mp->prev->next = mp->next;
16235 if (mp->next != NULL)
16236 mp->next->prev = mp->prev;
16237 else
16238 minipool_vector_tail = mp->prev;
16240 /* Re-insert it before MAX_MP. */
16241 mp->next = max_mp;
16242 mp->prev = max_mp->prev;
16243 max_mp->prev = mp;
16245 if (mp->prev != NULL)
16246 mp->prev->next = mp;
16247 else
16248 minipool_vector_head = mp;
16251 /* Save the new entry. */
16252 max_mp = mp;
16254 /* Scan over the preceding entries and adjust their addresses as
16255 required. */
16256 while (mp->prev != NULL
16257 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16259 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16260 mp = mp->prev;
16263 return max_mp;
16266 /* Add a constant to the minipool for a forward reference. Returns the
16267 node added or NULL if the constant will not fit in this pool. */
16268 static Mnode *
16269 add_minipool_forward_ref (Mfix *fix)
16271 /* If set, max_mp is the first pool_entry that has a lower
16272 constraint than the one we are trying to add. */
16273 Mnode * max_mp = NULL;
16274 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16275 Mnode * mp;
16277 /* If the minipool starts before the end of FIX->INSN then this FIX
16278 can not be placed into the current pool. Furthermore, adding the
16279 new constant pool entry may cause the pool to start FIX_SIZE bytes
16280 earlier. */
16281 if (minipool_vector_head &&
16282 (fix->address + get_attr_length (fix->insn)
16283 >= minipool_vector_head->max_address - fix->fix_size))
16284 return NULL;
16286 /* Scan the pool to see if a constant with the same value has
16287 already been added. While we are doing this, also note the
16288 location where we must insert the constant if it doesn't already
16289 exist. */
16290 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16292 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16293 && fix->mode == mp->mode
16294 && (!LABEL_P (fix->value)
16295 || (CODE_LABEL_NUMBER (fix->value)
16296 == CODE_LABEL_NUMBER (mp->value)))
16297 && rtx_equal_p (fix->value, mp->value))
16299 /* More than one fix references this entry. */
16300 mp->refcount++;
16301 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16304 /* Note the insertion point if necessary. */
16305 if (max_mp == NULL
16306 && mp->max_address > max_address)
16307 max_mp = mp;
16309 /* If we are inserting an 8-bytes aligned quantity and
16310 we have not already found an insertion point, then
16311 make sure that all such 8-byte aligned quantities are
16312 placed at the start of the pool. */
16313 if (ARM_DOUBLEWORD_ALIGN
16314 && max_mp == NULL
16315 && fix->fix_size >= 8
16316 && mp->fix_size < 8)
16318 max_mp = mp;
16319 max_address = mp->max_address;
16323 /* The value is not currently in the minipool, so we need to create
16324 a new entry for it. If MAX_MP is NULL, the entry will be put on
16325 the end of the list since the placement is less constrained than
16326 any existing entry. Otherwise, we insert the new fix before
16327 MAX_MP and, if necessary, adjust the constraints on the other
16328 entries. */
16329 mp = XNEW (Mnode);
16330 mp->fix_size = fix->fix_size;
16331 mp->mode = fix->mode;
16332 mp->value = fix->value;
16333 mp->refcount = 1;
16334 /* Not yet required for a backwards ref. */
16335 mp->min_address = -65536;
16337 if (max_mp == NULL)
16339 mp->max_address = max_address;
16340 mp->next = NULL;
16341 mp->prev = minipool_vector_tail;
16343 if (mp->prev == NULL)
16345 minipool_vector_head = mp;
16346 minipool_vector_label = gen_label_rtx ();
16348 else
16349 mp->prev->next = mp;
16351 minipool_vector_tail = mp;
16353 else
16355 if (max_address > max_mp->max_address - mp->fix_size)
16356 mp->max_address = max_mp->max_address - mp->fix_size;
16357 else
16358 mp->max_address = max_address;
16360 mp->next = max_mp;
16361 mp->prev = max_mp->prev;
16362 max_mp->prev = mp;
16363 if (mp->prev != NULL)
16364 mp->prev->next = mp;
16365 else
16366 minipool_vector_head = mp;
16369 /* Save the new entry. */
16370 max_mp = mp;
16372 /* Scan over the preceding entries and adjust their addresses as
16373 required. */
16374 while (mp->prev != NULL
16375 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16377 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16378 mp = mp->prev;
16381 return max_mp;
16384 static Mnode *
16385 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16386 HOST_WIDE_INT min_address)
16388 HOST_WIDE_INT offset;
16390 /* The code below assumes these are different. */
16391 gcc_assert (mp != min_mp);
16393 if (min_mp == NULL)
16395 if (min_address > mp->min_address)
16396 mp->min_address = min_address;
16398 else
16400 /* We will adjust this below if it is too loose. */
16401 mp->min_address = min_address;
16403 /* Unlink MP from its current position. Since min_mp is non-null,
16404 mp->next must be non-null. */
16405 mp->next->prev = mp->prev;
16406 if (mp->prev != NULL)
16407 mp->prev->next = mp->next;
16408 else
16409 minipool_vector_head = mp->next;
16411 /* Reinsert it after MIN_MP. */
16412 mp->prev = min_mp;
16413 mp->next = min_mp->next;
16414 min_mp->next = mp;
16415 if (mp->next != NULL)
16416 mp->next->prev = mp;
16417 else
16418 minipool_vector_tail = mp;
16421 min_mp = mp;
16423 offset = 0;
16424 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16426 mp->offset = offset;
16427 if (mp->refcount > 0)
16428 offset += mp->fix_size;
16430 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16431 mp->next->min_address = mp->min_address + mp->fix_size;
16434 return min_mp;
16437 /* Add a constant to the minipool for a backward reference. Returns the
16438 node added or NULL if the constant will not fit in this pool.
16440 Note that the code for insertion for a backwards reference can be
16441 somewhat confusing because the calculated offsets for each fix do
16442 not take into account the size of the pool (which is still under
16443 construction. */
16444 static Mnode *
16445 add_minipool_backward_ref (Mfix *fix)
16447 /* If set, min_mp is the last pool_entry that has a lower constraint
16448 than the one we are trying to add. */
16449 Mnode *min_mp = NULL;
16450 /* This can be negative, since it is only a constraint. */
16451 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16452 Mnode *mp;
16454 /* If we can't reach the current pool from this insn, or if we can't
16455 insert this entry at the end of the pool without pushing other
16456 fixes out of range, then we don't try. This ensures that we
16457 can't fail later on. */
16458 if (min_address >= minipool_barrier->address
16459 || (minipool_vector_tail->min_address + fix->fix_size
16460 >= minipool_barrier->address))
16461 return NULL;
16463 /* Scan the pool to see if a constant with the same value has
16464 already been added. While we are doing this, also note the
16465 location where we must insert the constant if it doesn't already
16466 exist. */
16467 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16469 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16470 && fix->mode == mp->mode
16471 && (!LABEL_P (fix->value)
16472 || (CODE_LABEL_NUMBER (fix->value)
16473 == CODE_LABEL_NUMBER (mp->value)))
16474 && rtx_equal_p (fix->value, mp->value)
16475 /* Check that there is enough slack to move this entry to the
16476 end of the table (this is conservative). */
16477 && (mp->max_address
16478 > (minipool_barrier->address
16479 + minipool_vector_tail->offset
16480 + minipool_vector_tail->fix_size)))
16482 mp->refcount++;
16483 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16486 if (min_mp != NULL)
16487 mp->min_address += fix->fix_size;
16488 else
16490 /* Note the insertion point if necessary. */
16491 if (mp->min_address < min_address)
16493 /* For now, we do not allow the insertion of 8-byte alignment
16494 requiring nodes anywhere but at the start of the pool. */
16495 if (ARM_DOUBLEWORD_ALIGN
16496 && fix->fix_size >= 8 && mp->fix_size < 8)
16497 return NULL;
16498 else
16499 min_mp = mp;
16501 else if (mp->max_address
16502 < minipool_barrier->address + mp->offset + fix->fix_size)
16504 /* Inserting before this entry would push the fix beyond
16505 its maximum address (which can happen if we have
16506 re-located a forwards fix); force the new fix to come
16507 after it. */
16508 if (ARM_DOUBLEWORD_ALIGN
16509 && fix->fix_size >= 8 && mp->fix_size < 8)
16510 return NULL;
16511 else
16513 min_mp = mp;
16514 min_address = mp->min_address + fix->fix_size;
16517 /* Do not insert a non-8-byte aligned quantity before 8-byte
16518 aligned quantities. */
16519 else if (ARM_DOUBLEWORD_ALIGN
16520 && fix->fix_size < 8
16521 && mp->fix_size >= 8)
16523 min_mp = mp;
16524 min_address = mp->min_address + fix->fix_size;
16529 /* We need to create a new entry. */
16530 mp = XNEW (Mnode);
16531 mp->fix_size = fix->fix_size;
16532 mp->mode = fix->mode;
16533 mp->value = fix->value;
16534 mp->refcount = 1;
16535 mp->max_address = minipool_barrier->address + 65536;
16537 mp->min_address = min_address;
16539 if (min_mp == NULL)
16541 mp->prev = NULL;
16542 mp->next = minipool_vector_head;
16544 if (mp->next == NULL)
16546 minipool_vector_tail = mp;
16547 minipool_vector_label = gen_label_rtx ();
16549 else
16550 mp->next->prev = mp;
16552 minipool_vector_head = mp;
16554 else
16556 mp->next = min_mp->next;
16557 mp->prev = min_mp;
16558 min_mp->next = mp;
16560 if (mp->next != NULL)
16561 mp->next->prev = mp;
16562 else
16563 minipool_vector_tail = mp;
16566 /* Save the new entry. */
16567 min_mp = mp;
16569 if (mp->prev)
16570 mp = mp->prev;
16571 else
16572 mp->offset = 0;
16574 /* Scan over the following entries and adjust their offsets. */
16575 while (mp->next != NULL)
16577 if (mp->next->min_address < mp->min_address + mp->fix_size)
16578 mp->next->min_address = mp->min_address + mp->fix_size;
16580 if (mp->refcount)
16581 mp->next->offset = mp->offset + mp->fix_size;
16582 else
16583 mp->next->offset = mp->offset;
16585 mp = mp->next;
16588 return min_mp;
16591 static void
16592 assign_minipool_offsets (Mfix *barrier)
16594 HOST_WIDE_INT offset = 0;
16595 Mnode *mp;
16597 minipool_barrier = barrier;
16599 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16601 mp->offset = offset;
16603 if (mp->refcount > 0)
16604 offset += mp->fix_size;
16608 /* Output the literal table */
16609 static void
16610 dump_minipool (rtx_insn *scan)
16612 Mnode * mp;
16613 Mnode * nmp;
16614 int align64 = 0;
16616 if (ARM_DOUBLEWORD_ALIGN)
16617 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16618 if (mp->refcount > 0 && mp->fix_size >= 8)
16620 align64 = 1;
16621 break;
16624 if (dump_file)
16625 fprintf (dump_file,
16626 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16627 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16629 scan = emit_label_after (gen_label_rtx (), scan);
16630 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16631 scan = emit_label_after (minipool_vector_label, scan);
16633 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16635 if (mp->refcount > 0)
16637 if (dump_file)
16639 fprintf (dump_file,
16640 ";; Offset %u, min %ld, max %ld ",
16641 (unsigned) mp->offset, (unsigned long) mp->min_address,
16642 (unsigned long) mp->max_address);
16643 arm_print_value (dump_file, mp->value);
16644 fputc ('\n', dump_file);
16647 switch (mp->fix_size)
16649 #ifdef HAVE_consttable_1
16650 case 1:
16651 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16652 break;
16654 #endif
16655 #ifdef HAVE_consttable_2
16656 case 2:
16657 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16658 break;
16660 #endif
16661 #ifdef HAVE_consttable_4
16662 case 4:
16663 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16664 break;
16666 #endif
16667 #ifdef HAVE_consttable_8
16668 case 8:
16669 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16670 break;
16672 #endif
16673 #ifdef HAVE_consttable_16
16674 case 16:
16675 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16676 break;
16678 #endif
16679 default:
16680 gcc_unreachable ();
16684 nmp = mp->next;
16685 free (mp);
16688 minipool_vector_head = minipool_vector_tail = NULL;
16689 scan = emit_insn_after (gen_consttable_end (), scan);
16690 scan = emit_barrier_after (scan);
16693 /* Return the cost of forcibly inserting a barrier after INSN. */
16694 static int
16695 arm_barrier_cost (rtx insn)
16697 /* Basing the location of the pool on the loop depth is preferable,
16698 but at the moment, the basic block information seems to be
16699 corrupt by this stage of the compilation. */
16700 int base_cost = 50;
16701 rtx next = next_nonnote_insn (insn);
16703 if (next != NULL && LABEL_P (next))
16704 base_cost -= 20;
16706 switch (GET_CODE (insn))
16708 case CODE_LABEL:
16709 /* It will always be better to place the table before the label, rather
16710 than after it. */
16711 return 50;
16713 case INSN:
16714 case CALL_INSN:
16715 return base_cost;
16717 case JUMP_INSN:
16718 return base_cost - 10;
16720 default:
16721 return base_cost + 10;
16725 /* Find the best place in the insn stream in the range
16726 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16727 Create the barrier by inserting a jump and add a new fix entry for
16728 it. */
16729 static Mfix *
16730 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16732 HOST_WIDE_INT count = 0;
16733 rtx_barrier *barrier;
16734 rtx_insn *from = fix->insn;
16735 /* The instruction after which we will insert the jump. */
16736 rtx_insn *selected = NULL;
16737 int selected_cost;
16738 /* The address at which the jump instruction will be placed. */
16739 HOST_WIDE_INT selected_address;
16740 Mfix * new_fix;
16741 HOST_WIDE_INT max_count = max_address - fix->address;
16742 rtx_code_label *label = gen_label_rtx ();
16744 selected_cost = arm_barrier_cost (from);
16745 selected_address = fix->address;
16747 while (from && count < max_count)
16749 rtx_jump_table_data *tmp;
16750 int new_cost;
16752 /* This code shouldn't have been called if there was a natural barrier
16753 within range. */
16754 gcc_assert (!BARRIER_P (from));
16756 /* Count the length of this insn. This must stay in sync with the
16757 code that pushes minipool fixes. */
16758 if (LABEL_P (from))
16759 count += get_label_padding (from);
16760 else
16761 count += get_attr_length (from);
16763 /* If there is a jump table, add its length. */
16764 if (tablejump_p (from, NULL, &tmp))
16766 count += get_jump_table_size (tmp);
16768 /* Jump tables aren't in a basic block, so base the cost on
16769 the dispatch insn. If we select this location, we will
16770 still put the pool after the table. */
16771 new_cost = arm_barrier_cost (from);
16773 if (count < max_count
16774 && (!selected || new_cost <= selected_cost))
16776 selected = tmp;
16777 selected_cost = new_cost;
16778 selected_address = fix->address + count;
16781 /* Continue after the dispatch table. */
16782 from = NEXT_INSN (tmp);
16783 continue;
16786 new_cost = arm_barrier_cost (from);
16788 if (count < max_count
16789 && (!selected || new_cost <= selected_cost))
16791 selected = from;
16792 selected_cost = new_cost;
16793 selected_address = fix->address + count;
16796 from = NEXT_INSN (from);
16799 /* Make sure that we found a place to insert the jump. */
16800 gcc_assert (selected);
16802 /* Make sure we do not split a call and its corresponding
16803 CALL_ARG_LOCATION note. */
16804 if (CALL_P (selected))
16806 rtx_insn *next = NEXT_INSN (selected);
16807 if (next && NOTE_P (next)
16808 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16809 selected = next;
16812 /* Create a new JUMP_INSN that branches around a barrier. */
16813 from = emit_jump_insn_after (gen_jump (label), selected);
16814 JUMP_LABEL (from) = label;
16815 barrier = emit_barrier_after (from);
16816 emit_label_after (label, barrier);
16818 /* Create a minipool barrier entry for the new barrier. */
16819 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16820 new_fix->insn = barrier;
16821 new_fix->address = selected_address;
16822 new_fix->next = fix->next;
16823 fix->next = new_fix;
16825 return new_fix;
16828 /* Record that there is a natural barrier in the insn stream at
16829 ADDRESS. */
16830 static void
16831 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16833 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16835 fix->insn = insn;
16836 fix->address = address;
16838 fix->next = NULL;
16839 if (minipool_fix_head != NULL)
16840 minipool_fix_tail->next = fix;
16841 else
16842 minipool_fix_head = fix;
16844 minipool_fix_tail = fix;
16847 /* Record INSN, which will need fixing up to load a value from the
16848 minipool. ADDRESS is the offset of the insn since the start of the
16849 function; LOC is a pointer to the part of the insn which requires
16850 fixing; VALUE is the constant that must be loaded, which is of type
16851 MODE. */
16852 static void
16853 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16854 machine_mode mode, rtx value)
16856 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16858 fix->insn = insn;
16859 fix->address = address;
16860 fix->loc = loc;
16861 fix->mode = mode;
16862 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16863 fix->value = value;
16864 fix->forwards = get_attr_pool_range (insn);
16865 fix->backwards = get_attr_neg_pool_range (insn);
16866 fix->minipool = NULL;
16868 /* If an insn doesn't have a range defined for it, then it isn't
16869 expecting to be reworked by this code. Better to stop now than
16870 to generate duff assembly code. */
16871 gcc_assert (fix->forwards || fix->backwards);
16873 /* If an entry requires 8-byte alignment then assume all constant pools
16874 require 4 bytes of padding. Trying to do this later on a per-pool
16875 basis is awkward because existing pool entries have to be modified. */
16876 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16877 minipool_pad = 4;
16879 if (dump_file)
16881 fprintf (dump_file,
16882 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16883 GET_MODE_NAME (mode),
16884 INSN_UID (insn), (unsigned long) address,
16885 -1 * (long)fix->backwards, (long)fix->forwards);
16886 arm_print_value (dump_file, fix->value);
16887 fprintf (dump_file, "\n");
16890 /* Add it to the chain of fixes. */
16891 fix->next = NULL;
16893 if (minipool_fix_head != NULL)
16894 minipool_fix_tail->next = fix;
16895 else
16896 minipool_fix_head = fix;
16898 minipool_fix_tail = fix;
16901 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16902 Returns the number of insns needed, or 99 if we always want to synthesize
16903 the value. */
16905 arm_max_const_double_inline_cost ()
16907 /* Let the value get synthesized to avoid the use of literal pools. */
16908 if (arm_disable_literal_pool)
16909 return 99;
16911 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16914 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16915 Returns the number of insns needed, or 99 if we don't know how to
16916 do it. */
16918 arm_const_double_inline_cost (rtx val)
16920 rtx lowpart, highpart;
16921 machine_mode mode;
16923 mode = GET_MODE (val);
16925 if (mode == VOIDmode)
16926 mode = DImode;
16928 gcc_assert (GET_MODE_SIZE (mode) == 8);
16930 lowpart = gen_lowpart (SImode, val);
16931 highpart = gen_highpart_mode (SImode, mode, val);
16933 gcc_assert (CONST_INT_P (lowpart));
16934 gcc_assert (CONST_INT_P (highpart));
16936 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16937 NULL_RTX, NULL_RTX, 0, 0)
16938 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16939 NULL_RTX, NULL_RTX, 0, 0));
16942 /* Cost of loading a SImode constant. */
16943 static inline int
16944 arm_const_inline_cost (enum rtx_code code, rtx val)
16946 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16947 NULL_RTX, NULL_RTX, 1, 0);
16950 /* Return true if it is worthwhile to split a 64-bit constant into two
16951 32-bit operations. This is the case if optimizing for size, or
16952 if we have load delay slots, or if one 32-bit part can be done with
16953 a single data operation. */
16954 bool
16955 arm_const_double_by_parts (rtx val)
16957 machine_mode mode = GET_MODE (val);
16958 rtx part;
16960 if (optimize_size || arm_ld_sched)
16961 return true;
16963 if (mode == VOIDmode)
16964 mode = DImode;
16966 part = gen_highpart_mode (SImode, mode, val);
16968 gcc_assert (CONST_INT_P (part));
16970 if (const_ok_for_arm (INTVAL (part))
16971 || const_ok_for_arm (~INTVAL (part)))
16972 return true;
16974 part = gen_lowpart (SImode, val);
16976 gcc_assert (CONST_INT_P (part));
16978 if (const_ok_for_arm (INTVAL (part))
16979 || const_ok_for_arm (~INTVAL (part)))
16980 return true;
16982 return false;
16985 /* Return true if it is possible to inline both the high and low parts
16986 of a 64-bit constant into 32-bit data processing instructions. */
16987 bool
16988 arm_const_double_by_immediates (rtx val)
16990 machine_mode mode = GET_MODE (val);
16991 rtx part;
16993 if (mode == VOIDmode)
16994 mode = DImode;
16996 part = gen_highpart_mode (SImode, mode, val);
16998 gcc_assert (CONST_INT_P (part));
17000 if (!const_ok_for_arm (INTVAL (part)))
17001 return false;
17003 part = gen_lowpart (SImode, val);
17005 gcc_assert (CONST_INT_P (part));
17007 if (!const_ok_for_arm (INTVAL (part)))
17008 return false;
17010 return true;
17013 /* Scan INSN and note any of its operands that need fixing.
17014 If DO_PUSHES is false we do not actually push any of the fixups
17015 needed. */
17016 static void
17017 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17019 int opno;
17021 extract_constrain_insn (insn);
17023 if (recog_data.n_alternatives == 0)
17024 return;
17026 /* Fill in recog_op_alt with information about the constraints of
17027 this insn. */
17028 preprocess_constraints (insn);
17030 const operand_alternative *op_alt = which_op_alt ();
17031 for (opno = 0; opno < recog_data.n_operands; opno++)
17033 /* Things we need to fix can only occur in inputs. */
17034 if (recog_data.operand_type[opno] != OP_IN)
17035 continue;
17037 /* If this alternative is a memory reference, then any mention
17038 of constants in this alternative is really to fool reload
17039 into allowing us to accept one there. We need to fix them up
17040 now so that we output the right code. */
17041 if (op_alt[opno].memory_ok)
17043 rtx op = recog_data.operand[opno];
17045 if (CONSTANT_P (op))
17047 if (do_pushes)
17048 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17049 recog_data.operand_mode[opno], op);
17051 else if (MEM_P (op)
17052 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17053 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17055 if (do_pushes)
17057 rtx cop = avoid_constant_pool_reference (op);
17059 /* Casting the address of something to a mode narrower
17060 than a word can cause avoid_constant_pool_reference()
17061 to return the pool reference itself. That's no good to
17062 us here. Lets just hope that we can use the
17063 constant pool value directly. */
17064 if (op == cop)
17065 cop = get_pool_constant (XEXP (op, 0));
17067 push_minipool_fix (insn, address,
17068 recog_data.operand_loc[opno],
17069 recog_data.operand_mode[opno], cop);
17076 return;
17079 /* Rewrite move insn into subtract of 0 if the condition codes will
17080 be useful in next conditional jump insn. */
17082 static void
17083 thumb1_reorg (void)
17085 basic_block bb;
17087 FOR_EACH_BB_FN (bb, cfun)
17089 rtx dest, src;
17090 rtx pat, op0, set = NULL;
17091 rtx_insn *prev, *insn = BB_END (bb);
17092 bool insn_clobbered = false;
17094 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17095 insn = PREV_INSN (insn);
17097 /* Find the last cbranchsi4_insn in basic block BB. */
17098 if (insn == BB_HEAD (bb)
17099 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17100 continue;
17102 /* Get the register with which we are comparing. */
17103 pat = PATTERN (insn);
17104 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
17106 /* Find the first flag setting insn before INSN in basic block BB. */
17107 gcc_assert (insn != BB_HEAD (bb));
17108 for (prev = PREV_INSN (insn);
17109 (!insn_clobbered
17110 && prev != BB_HEAD (bb)
17111 && (NOTE_P (prev)
17112 || DEBUG_INSN_P (prev)
17113 || ((set = single_set (prev)) != NULL
17114 && get_attr_conds (prev) == CONDS_NOCOND)));
17115 prev = PREV_INSN (prev))
17117 if (reg_set_p (op0, prev))
17118 insn_clobbered = true;
17121 /* Skip if op0 is clobbered by insn other than prev. */
17122 if (insn_clobbered)
17123 continue;
17125 if (!set)
17126 continue;
17128 dest = SET_DEST (set);
17129 src = SET_SRC (set);
17130 if (!low_register_operand (dest, SImode)
17131 || !low_register_operand (src, SImode))
17132 continue;
17134 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17135 in INSN. Both src and dest of the move insn are checked. */
17136 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17138 dest = copy_rtx (dest);
17139 src = copy_rtx (src);
17140 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17141 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
17142 INSN_CODE (prev) = -1;
17143 /* Set test register in INSN to dest. */
17144 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
17145 INSN_CODE (insn) = -1;
17150 /* Convert instructions to their cc-clobbering variant if possible, since
17151 that allows us to use smaller encodings. */
17153 static void
17154 thumb2_reorg (void)
17156 basic_block bb;
17157 regset_head live;
17159 INIT_REG_SET (&live);
17161 /* We are freeing block_for_insn in the toplev to keep compatibility
17162 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17163 compute_bb_for_insn ();
17164 df_analyze ();
17166 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17168 FOR_EACH_BB_FN (bb, cfun)
17170 if (current_tune->disparage_flag_setting_t16_encodings
17171 && optimize_bb_for_speed_p (bb))
17172 continue;
17174 rtx_insn *insn;
17175 Convert_Action action = SKIP;
17176 Convert_Action action_for_partial_flag_setting
17177 = (current_tune->disparage_partial_flag_setting_t16_encodings
17178 && optimize_bb_for_speed_p (bb))
17179 ? SKIP : CONV;
17181 COPY_REG_SET (&live, DF_LR_OUT (bb));
17182 df_simulate_initialize_backwards (bb, &live);
17183 FOR_BB_INSNS_REVERSE (bb, insn)
17185 if (NONJUMP_INSN_P (insn)
17186 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17187 && GET_CODE (PATTERN (insn)) == SET)
17189 action = SKIP;
17190 rtx pat = PATTERN (insn);
17191 rtx dst = XEXP (pat, 0);
17192 rtx src = XEXP (pat, 1);
17193 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17195 if (!OBJECT_P (src))
17196 op0 = XEXP (src, 0);
17198 if (BINARY_P (src))
17199 op1 = XEXP (src, 1);
17201 if (low_register_operand (dst, SImode))
17203 switch (GET_CODE (src))
17205 case PLUS:
17206 /* Adding two registers and storing the result
17207 in the first source is already a 16-bit
17208 operation. */
17209 if (rtx_equal_p (dst, op0)
17210 && register_operand (op1, SImode))
17211 break;
17213 if (low_register_operand (op0, SImode))
17215 /* ADDS <Rd>,<Rn>,<Rm> */
17216 if (low_register_operand (op1, SImode))
17217 action = CONV;
17218 /* ADDS <Rdn>,#<imm8> */
17219 /* SUBS <Rdn>,#<imm8> */
17220 else if (rtx_equal_p (dst, op0)
17221 && CONST_INT_P (op1)
17222 && IN_RANGE (INTVAL (op1), -255, 255))
17223 action = CONV;
17224 /* ADDS <Rd>,<Rn>,#<imm3> */
17225 /* SUBS <Rd>,<Rn>,#<imm3> */
17226 else if (CONST_INT_P (op1)
17227 && IN_RANGE (INTVAL (op1), -7, 7))
17228 action = CONV;
17230 /* ADCS <Rd>, <Rn> */
17231 else if (GET_CODE (XEXP (src, 0)) == PLUS
17232 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17233 && low_register_operand (XEXP (XEXP (src, 0), 1),
17234 SImode)
17235 && COMPARISON_P (op1)
17236 && cc_register (XEXP (op1, 0), VOIDmode)
17237 && maybe_get_arm_condition_code (op1) == ARM_CS
17238 && XEXP (op1, 1) == const0_rtx)
17239 action = CONV;
17240 break;
17242 case MINUS:
17243 /* RSBS <Rd>,<Rn>,#0
17244 Not handled here: see NEG below. */
17245 /* SUBS <Rd>,<Rn>,#<imm3>
17246 SUBS <Rdn>,#<imm8>
17247 Not handled here: see PLUS above. */
17248 /* SUBS <Rd>,<Rn>,<Rm> */
17249 if (low_register_operand (op0, SImode)
17250 && low_register_operand (op1, SImode))
17251 action = CONV;
17252 break;
17254 case MULT:
17255 /* MULS <Rdm>,<Rn>,<Rdm>
17256 As an exception to the rule, this is only used
17257 when optimizing for size since MULS is slow on all
17258 known implementations. We do not even want to use
17259 MULS in cold code, if optimizing for speed, so we
17260 test the global flag here. */
17261 if (!optimize_size)
17262 break;
17263 /* else fall through. */
17264 case AND:
17265 case IOR:
17266 case XOR:
17267 /* ANDS <Rdn>,<Rm> */
17268 if (rtx_equal_p (dst, op0)
17269 && low_register_operand (op1, SImode))
17270 action = action_for_partial_flag_setting;
17271 else if (rtx_equal_p (dst, op1)
17272 && low_register_operand (op0, SImode))
17273 action = action_for_partial_flag_setting == SKIP
17274 ? SKIP : SWAP_CONV;
17275 break;
17277 case ASHIFTRT:
17278 case ASHIFT:
17279 case LSHIFTRT:
17280 /* ASRS <Rdn>,<Rm> */
17281 /* LSRS <Rdn>,<Rm> */
17282 /* LSLS <Rdn>,<Rm> */
17283 if (rtx_equal_p (dst, op0)
17284 && low_register_operand (op1, SImode))
17285 action = action_for_partial_flag_setting;
17286 /* ASRS <Rd>,<Rm>,#<imm5> */
17287 /* LSRS <Rd>,<Rm>,#<imm5> */
17288 /* LSLS <Rd>,<Rm>,#<imm5> */
17289 else if (low_register_operand (op0, SImode)
17290 && CONST_INT_P (op1)
17291 && IN_RANGE (INTVAL (op1), 0, 31))
17292 action = action_for_partial_flag_setting;
17293 break;
17295 case ROTATERT:
17296 /* RORS <Rdn>,<Rm> */
17297 if (rtx_equal_p (dst, op0)
17298 && low_register_operand (op1, SImode))
17299 action = action_for_partial_flag_setting;
17300 break;
17302 case NOT:
17303 /* MVNS <Rd>,<Rm> */
17304 if (low_register_operand (op0, SImode))
17305 action = action_for_partial_flag_setting;
17306 break;
17308 case NEG:
17309 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17310 if (low_register_operand (op0, SImode))
17311 action = CONV;
17312 break;
17314 case CONST_INT:
17315 /* MOVS <Rd>,#<imm8> */
17316 if (CONST_INT_P (src)
17317 && IN_RANGE (INTVAL (src), 0, 255))
17318 action = action_for_partial_flag_setting;
17319 break;
17321 case REG:
17322 /* MOVS and MOV<c> with registers have different
17323 encodings, so are not relevant here. */
17324 break;
17326 default:
17327 break;
17331 if (action != SKIP)
17333 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17334 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17335 rtvec vec;
17337 if (action == SWAP_CONV)
17339 src = copy_rtx (src);
17340 XEXP (src, 0) = op1;
17341 XEXP (src, 1) = op0;
17342 pat = gen_rtx_SET (VOIDmode, dst, src);
17343 vec = gen_rtvec (2, pat, clobber);
17345 else /* action == CONV */
17346 vec = gen_rtvec (2, pat, clobber);
17348 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17349 INSN_CODE (insn) = -1;
17353 if (NONDEBUG_INSN_P (insn))
17354 df_simulate_one_insn_backwards (bb, insn, &live);
17358 CLEAR_REG_SET (&live);
17361 /* Gcc puts the pool in the wrong place for ARM, since we can only
17362 load addresses a limited distance around the pc. We do some
17363 special munging to move the constant pool values to the correct
17364 point in the code. */
17365 static void
17366 arm_reorg (void)
17368 rtx_insn *insn;
17369 HOST_WIDE_INT address = 0;
17370 Mfix * fix;
17372 if (TARGET_THUMB1)
17373 thumb1_reorg ();
17374 else if (TARGET_THUMB2)
17375 thumb2_reorg ();
17377 /* Ensure all insns that must be split have been split at this point.
17378 Otherwise, the pool placement code below may compute incorrect
17379 insn lengths. Note that when optimizing, all insns have already
17380 been split at this point. */
17381 if (!optimize)
17382 split_all_insns_noflow ();
17384 minipool_fix_head = minipool_fix_tail = NULL;
17386 /* The first insn must always be a note, or the code below won't
17387 scan it properly. */
17388 insn = get_insns ();
17389 gcc_assert (NOTE_P (insn));
17390 minipool_pad = 0;
17392 /* Scan all the insns and record the operands that will need fixing. */
17393 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17395 if (BARRIER_P (insn))
17396 push_minipool_barrier (insn, address);
17397 else if (INSN_P (insn))
17399 rtx_jump_table_data *table;
17401 note_invalid_constants (insn, address, true);
17402 address += get_attr_length (insn);
17404 /* If the insn is a vector jump, add the size of the table
17405 and skip the table. */
17406 if (tablejump_p (insn, NULL, &table))
17408 address += get_jump_table_size (table);
17409 insn = table;
17412 else if (LABEL_P (insn))
17413 /* Add the worst-case padding due to alignment. We don't add
17414 the _current_ padding because the minipool insertions
17415 themselves might change it. */
17416 address += get_label_padding (insn);
17419 fix = minipool_fix_head;
17421 /* Now scan the fixups and perform the required changes. */
17422 while (fix)
17424 Mfix * ftmp;
17425 Mfix * fdel;
17426 Mfix * last_added_fix;
17427 Mfix * last_barrier = NULL;
17428 Mfix * this_fix;
17430 /* Skip any further barriers before the next fix. */
17431 while (fix && BARRIER_P (fix->insn))
17432 fix = fix->next;
17434 /* No more fixes. */
17435 if (fix == NULL)
17436 break;
17438 last_added_fix = NULL;
17440 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17442 if (BARRIER_P (ftmp->insn))
17444 if (ftmp->address >= minipool_vector_head->max_address)
17445 break;
17447 last_barrier = ftmp;
17449 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17450 break;
17452 last_added_fix = ftmp; /* Keep track of the last fix added. */
17455 /* If we found a barrier, drop back to that; any fixes that we
17456 could have reached but come after the barrier will now go in
17457 the next mini-pool. */
17458 if (last_barrier != NULL)
17460 /* Reduce the refcount for those fixes that won't go into this
17461 pool after all. */
17462 for (fdel = last_barrier->next;
17463 fdel && fdel != ftmp;
17464 fdel = fdel->next)
17466 fdel->minipool->refcount--;
17467 fdel->minipool = NULL;
17470 ftmp = last_barrier;
17472 else
17474 /* ftmp is first fix that we can't fit into this pool and
17475 there no natural barriers that we could use. Insert a
17476 new barrier in the code somewhere between the previous
17477 fix and this one, and arrange to jump around it. */
17478 HOST_WIDE_INT max_address;
17480 /* The last item on the list of fixes must be a barrier, so
17481 we can never run off the end of the list of fixes without
17482 last_barrier being set. */
17483 gcc_assert (ftmp);
17485 max_address = minipool_vector_head->max_address;
17486 /* Check that there isn't another fix that is in range that
17487 we couldn't fit into this pool because the pool was
17488 already too large: we need to put the pool before such an
17489 instruction. The pool itself may come just after the
17490 fix because create_fix_barrier also allows space for a
17491 jump instruction. */
17492 if (ftmp->address < max_address)
17493 max_address = ftmp->address + 1;
17495 last_barrier = create_fix_barrier (last_added_fix, max_address);
17498 assign_minipool_offsets (last_barrier);
17500 while (ftmp)
17502 if (!BARRIER_P (ftmp->insn)
17503 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17504 == NULL))
17505 break;
17507 ftmp = ftmp->next;
17510 /* Scan over the fixes we have identified for this pool, fixing them
17511 up and adding the constants to the pool itself. */
17512 for (this_fix = fix; this_fix && ftmp != this_fix;
17513 this_fix = this_fix->next)
17514 if (!BARRIER_P (this_fix->insn))
17516 rtx addr
17517 = plus_constant (Pmode,
17518 gen_rtx_LABEL_REF (VOIDmode,
17519 minipool_vector_label),
17520 this_fix->minipool->offset);
17521 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17524 dump_minipool (last_barrier->insn);
17525 fix = ftmp;
17528 /* From now on we must synthesize any constants that we can't handle
17529 directly. This can happen if the RTL gets split during final
17530 instruction generation. */
17531 cfun->machine->after_arm_reorg = 1;
17533 /* Free the minipool memory. */
17534 obstack_free (&minipool_obstack, minipool_startobj);
17537 /* Routines to output assembly language. */
17539 /* Return string representation of passed in real value. */
17540 static const char *
17541 fp_const_from_val (REAL_VALUE_TYPE *r)
17543 if (!fp_consts_inited)
17544 init_fp_table ();
17546 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17547 return "0";
17550 /* OPERANDS[0] is the entire list of insns that constitute pop,
17551 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17552 is in the list, UPDATE is true iff the list contains explicit
17553 update of base register. */
17554 void
17555 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17556 bool update)
17558 int i;
17559 char pattern[100];
17560 int offset;
17561 const char *conditional;
17562 int num_saves = XVECLEN (operands[0], 0);
17563 unsigned int regno;
17564 unsigned int regno_base = REGNO (operands[1]);
17566 offset = 0;
17567 offset += update ? 1 : 0;
17568 offset += return_pc ? 1 : 0;
17570 /* Is the base register in the list? */
17571 for (i = offset; i < num_saves; i++)
17573 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17574 /* If SP is in the list, then the base register must be SP. */
17575 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17576 /* If base register is in the list, there must be no explicit update. */
17577 if (regno == regno_base)
17578 gcc_assert (!update);
17581 conditional = reverse ? "%?%D0" : "%?%d0";
17582 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17584 /* Output pop (not stmfd) because it has a shorter encoding. */
17585 gcc_assert (update);
17586 sprintf (pattern, "pop%s\t{", conditional);
17588 else
17590 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17591 It's just a convention, their semantics are identical. */
17592 if (regno_base == SP_REGNUM)
17593 sprintf (pattern, "ldm%sfd\t", conditional);
17594 else if (TARGET_UNIFIED_ASM)
17595 sprintf (pattern, "ldmia%s\t", conditional);
17596 else
17597 sprintf (pattern, "ldm%sia\t", conditional);
17599 strcat (pattern, reg_names[regno_base]);
17600 if (update)
17601 strcat (pattern, "!, {");
17602 else
17603 strcat (pattern, ", {");
17606 /* Output the first destination register. */
17607 strcat (pattern,
17608 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17610 /* Output the rest of the destination registers. */
17611 for (i = offset + 1; i < num_saves; i++)
17613 strcat (pattern, ", ");
17614 strcat (pattern,
17615 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17618 strcat (pattern, "}");
17620 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17621 strcat (pattern, "^");
17623 output_asm_insn (pattern, &cond);
17627 /* Output the assembly for a store multiple. */
17629 const char *
17630 vfp_output_vstmd (rtx * operands)
17632 char pattern[100];
17633 int p;
17634 int base;
17635 int i;
17636 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17637 ? XEXP (operands[0], 0)
17638 : XEXP (XEXP (operands[0], 0), 0);
17639 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17641 if (push_p)
17642 strcpy (pattern, "vpush%?.64\t{%P1");
17643 else
17644 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17646 p = strlen (pattern);
17648 gcc_assert (REG_P (operands[1]));
17650 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17651 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17653 p += sprintf (&pattern[p], ", d%d", base + i);
17655 strcpy (&pattern[p], "}");
17657 output_asm_insn (pattern, operands);
17658 return "";
17662 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17663 number of bytes pushed. */
17665 static int
17666 vfp_emit_fstmd (int base_reg, int count)
17668 rtx par;
17669 rtx dwarf;
17670 rtx tmp, reg;
17671 int i;
17673 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17674 register pairs are stored by a store multiple insn. We avoid this
17675 by pushing an extra pair. */
17676 if (count == 2 && !arm_arch6)
17678 if (base_reg == LAST_VFP_REGNUM - 3)
17679 base_reg -= 2;
17680 count++;
17683 /* FSTMD may not store more than 16 doubleword registers at once. Split
17684 larger stores into multiple parts (up to a maximum of two, in
17685 practice). */
17686 if (count > 16)
17688 int saved;
17689 /* NOTE: base_reg is an internal register number, so each D register
17690 counts as 2. */
17691 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17692 saved += vfp_emit_fstmd (base_reg, 16);
17693 return saved;
17696 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17697 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17699 reg = gen_rtx_REG (DFmode, base_reg);
17700 base_reg += 2;
17702 XVECEXP (par, 0, 0)
17703 = gen_rtx_SET (VOIDmode,
17704 gen_frame_mem
17705 (BLKmode,
17706 gen_rtx_PRE_MODIFY (Pmode,
17707 stack_pointer_rtx,
17708 plus_constant
17709 (Pmode, stack_pointer_rtx,
17710 - (count * 8)))
17712 gen_rtx_UNSPEC (BLKmode,
17713 gen_rtvec (1, reg),
17714 UNSPEC_PUSH_MULT));
17716 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17717 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17718 RTX_FRAME_RELATED_P (tmp) = 1;
17719 XVECEXP (dwarf, 0, 0) = tmp;
17721 tmp = gen_rtx_SET (VOIDmode,
17722 gen_frame_mem (DFmode, stack_pointer_rtx),
17723 reg);
17724 RTX_FRAME_RELATED_P (tmp) = 1;
17725 XVECEXP (dwarf, 0, 1) = tmp;
17727 for (i = 1; i < count; i++)
17729 reg = gen_rtx_REG (DFmode, base_reg);
17730 base_reg += 2;
17731 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17733 tmp = gen_rtx_SET (VOIDmode,
17734 gen_frame_mem (DFmode,
17735 plus_constant (Pmode,
17736 stack_pointer_rtx,
17737 i * 8)),
17738 reg);
17739 RTX_FRAME_RELATED_P (tmp) = 1;
17740 XVECEXP (dwarf, 0, i + 1) = tmp;
17743 par = emit_insn (par);
17744 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17745 RTX_FRAME_RELATED_P (par) = 1;
17747 return count * 8;
17750 /* Emit a call instruction with pattern PAT. ADDR is the address of
17751 the call target. */
17753 void
17754 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17756 rtx insn;
17758 insn = emit_call_insn (pat);
17760 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17761 If the call might use such an entry, add a use of the PIC register
17762 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17763 if (TARGET_VXWORKS_RTP
17764 && flag_pic
17765 && !sibcall
17766 && GET_CODE (addr) == SYMBOL_REF
17767 && (SYMBOL_REF_DECL (addr)
17768 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17769 : !SYMBOL_REF_LOCAL_P (addr)))
17771 require_pic_register ();
17772 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17775 if (TARGET_AAPCS_BASED)
17777 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17778 linker. We need to add an IP clobber to allow setting
17779 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17780 is not needed since it's a fixed register. */
17781 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17782 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17786 /* Output a 'call' insn. */
17787 const char *
17788 output_call (rtx *operands)
17790 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17792 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17793 if (REGNO (operands[0]) == LR_REGNUM)
17795 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17796 output_asm_insn ("mov%?\t%0, %|lr", operands);
17799 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17801 if (TARGET_INTERWORK || arm_arch4t)
17802 output_asm_insn ("bx%?\t%0", operands);
17803 else
17804 output_asm_insn ("mov%?\t%|pc, %0", operands);
17806 return "";
17809 /* Output a 'call' insn that is a reference in memory. This is
17810 disabled for ARMv5 and we prefer a blx instead because otherwise
17811 there's a significant performance overhead. */
17812 const char *
17813 output_call_mem (rtx *operands)
17815 gcc_assert (!arm_arch5);
17816 if (TARGET_INTERWORK)
17818 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17819 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17820 output_asm_insn ("bx%?\t%|ip", operands);
17822 else if (regno_use_in (LR_REGNUM, operands[0]))
17824 /* LR is used in the memory address. We load the address in the
17825 first instruction. It's safe to use IP as the target of the
17826 load since the call will kill it anyway. */
17827 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17828 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17829 if (arm_arch4t)
17830 output_asm_insn ("bx%?\t%|ip", operands);
17831 else
17832 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17834 else
17836 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17837 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17840 return "";
17844 /* Output a move from arm registers to arm registers of a long double
17845 OPERANDS[0] is the destination.
17846 OPERANDS[1] is the source. */
17847 const char *
17848 output_mov_long_double_arm_from_arm (rtx *operands)
17850 /* We have to be careful here because the two might overlap. */
17851 int dest_start = REGNO (operands[0]);
17852 int src_start = REGNO (operands[1]);
17853 rtx ops[2];
17854 int i;
17856 if (dest_start < src_start)
17858 for (i = 0; i < 3; i++)
17860 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17861 ops[1] = gen_rtx_REG (SImode, src_start + i);
17862 output_asm_insn ("mov%?\t%0, %1", ops);
17865 else
17867 for (i = 2; i >= 0; i--)
17869 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17870 ops[1] = gen_rtx_REG (SImode, src_start + i);
17871 output_asm_insn ("mov%?\t%0, %1", ops);
17875 return "";
17878 void
17879 arm_emit_movpair (rtx dest, rtx src)
17881 /* If the src is an immediate, simplify it. */
17882 if (CONST_INT_P (src))
17884 HOST_WIDE_INT val = INTVAL (src);
17885 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17886 if ((val >> 16) & 0x0000ffff)
17887 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17888 GEN_INT (16)),
17889 GEN_INT ((val >> 16) & 0x0000ffff));
17890 return;
17892 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17893 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17896 /* Output a move between double words. It must be REG<-MEM
17897 or MEM<-REG. */
17898 const char *
17899 output_move_double (rtx *operands, bool emit, int *count)
17901 enum rtx_code code0 = GET_CODE (operands[0]);
17902 enum rtx_code code1 = GET_CODE (operands[1]);
17903 rtx otherops[3];
17904 if (count)
17905 *count = 1;
17907 /* The only case when this might happen is when
17908 you are looking at the length of a DImode instruction
17909 that has an invalid constant in it. */
17910 if (code0 == REG && code1 != MEM)
17912 gcc_assert (!emit);
17913 *count = 2;
17914 return "";
17917 if (code0 == REG)
17919 unsigned int reg0 = REGNO (operands[0]);
17921 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17923 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17925 switch (GET_CODE (XEXP (operands[1], 0)))
17927 case REG:
17929 if (emit)
17931 if (TARGET_LDRD
17932 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17933 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17934 else
17935 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17937 break;
17939 case PRE_INC:
17940 gcc_assert (TARGET_LDRD);
17941 if (emit)
17942 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17943 break;
17945 case PRE_DEC:
17946 if (emit)
17948 if (TARGET_LDRD)
17949 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17950 else
17951 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17953 break;
17955 case POST_INC:
17956 if (emit)
17958 if (TARGET_LDRD)
17959 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17960 else
17961 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17963 break;
17965 case POST_DEC:
17966 gcc_assert (TARGET_LDRD);
17967 if (emit)
17968 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17969 break;
17971 case PRE_MODIFY:
17972 case POST_MODIFY:
17973 /* Autoicrement addressing modes should never have overlapping
17974 base and destination registers, and overlapping index registers
17975 are already prohibited, so this doesn't need to worry about
17976 fix_cm3_ldrd. */
17977 otherops[0] = operands[0];
17978 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17979 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17981 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17983 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17985 /* Registers overlap so split out the increment. */
17986 if (emit)
17988 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17989 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
17991 if (count)
17992 *count = 2;
17994 else
17996 /* Use a single insn if we can.
17997 FIXME: IWMMXT allows offsets larger than ldrd can
17998 handle, fix these up with a pair of ldr. */
17999 if (TARGET_THUMB2
18000 || !CONST_INT_P (otherops[2])
18001 || (INTVAL (otherops[2]) > -256
18002 && INTVAL (otherops[2]) < 256))
18004 if (emit)
18005 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
18007 else
18009 if (emit)
18011 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18012 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18014 if (count)
18015 *count = 2;
18020 else
18022 /* Use a single insn if we can.
18023 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18024 fix these up with a pair of ldr. */
18025 if (TARGET_THUMB2
18026 || !CONST_INT_P (otherops[2])
18027 || (INTVAL (otherops[2]) > -256
18028 && INTVAL (otherops[2]) < 256))
18030 if (emit)
18031 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
18033 else
18035 if (emit)
18037 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18038 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18040 if (count)
18041 *count = 2;
18044 break;
18046 case LABEL_REF:
18047 case CONST:
18048 /* We might be able to use ldrd %0, %1 here. However the range is
18049 different to ldr/adr, and it is broken on some ARMv7-M
18050 implementations. */
18051 /* Use the second register of the pair to avoid problematic
18052 overlap. */
18053 otherops[1] = operands[1];
18054 if (emit)
18055 output_asm_insn ("adr%?\t%0, %1", otherops);
18056 operands[1] = otherops[0];
18057 if (emit)
18059 if (TARGET_LDRD)
18060 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18061 else
18062 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
18065 if (count)
18066 *count = 2;
18067 break;
18069 /* ??? This needs checking for thumb2. */
18070 default:
18071 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18072 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18074 otherops[0] = operands[0];
18075 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18076 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18078 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18080 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18082 switch ((int) INTVAL (otherops[2]))
18084 case -8:
18085 if (emit)
18086 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
18087 return "";
18088 case -4:
18089 if (TARGET_THUMB2)
18090 break;
18091 if (emit)
18092 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
18093 return "";
18094 case 4:
18095 if (TARGET_THUMB2)
18096 break;
18097 if (emit)
18098 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
18099 return "";
18102 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18103 operands[1] = otherops[0];
18104 if (TARGET_LDRD
18105 && (REG_P (otherops[2])
18106 || TARGET_THUMB2
18107 || (CONST_INT_P (otherops[2])
18108 && INTVAL (otherops[2]) > -256
18109 && INTVAL (otherops[2]) < 256)))
18111 if (reg_overlap_mentioned_p (operands[0],
18112 otherops[2]))
18114 rtx tmp;
18115 /* Swap base and index registers over to
18116 avoid a conflict. */
18117 tmp = otherops[1];
18118 otherops[1] = otherops[2];
18119 otherops[2] = tmp;
18121 /* If both registers conflict, it will usually
18122 have been fixed by a splitter. */
18123 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18124 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18126 if (emit)
18128 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18129 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18131 if (count)
18132 *count = 2;
18134 else
18136 otherops[0] = operands[0];
18137 if (emit)
18138 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
18140 return "";
18143 if (CONST_INT_P (otherops[2]))
18145 if (emit)
18147 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18148 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18149 else
18150 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18153 else
18155 if (emit)
18156 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18159 else
18161 if (emit)
18162 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18165 if (count)
18166 *count = 2;
18168 if (TARGET_LDRD)
18169 return "ldr%(d%)\t%0, [%1]";
18171 return "ldm%(ia%)\t%1, %M0";
18173 else
18175 otherops[1] = adjust_address (operands[1], SImode, 4);
18176 /* Take care of overlapping base/data reg. */
18177 if (reg_mentioned_p (operands[0], operands[1]))
18179 if (emit)
18181 output_asm_insn ("ldr%?\t%0, %1", otherops);
18182 output_asm_insn ("ldr%?\t%0, %1", operands);
18184 if (count)
18185 *count = 2;
18188 else
18190 if (emit)
18192 output_asm_insn ("ldr%?\t%0, %1", operands);
18193 output_asm_insn ("ldr%?\t%0, %1", otherops);
18195 if (count)
18196 *count = 2;
18201 else
18203 /* Constraints should ensure this. */
18204 gcc_assert (code0 == MEM && code1 == REG);
18205 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18206 || (TARGET_ARM && TARGET_LDRD));
18208 switch (GET_CODE (XEXP (operands[0], 0)))
18210 case REG:
18211 if (emit)
18213 if (TARGET_LDRD)
18214 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18215 else
18216 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18218 break;
18220 case PRE_INC:
18221 gcc_assert (TARGET_LDRD);
18222 if (emit)
18223 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18224 break;
18226 case PRE_DEC:
18227 if (emit)
18229 if (TARGET_LDRD)
18230 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18231 else
18232 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18234 break;
18236 case POST_INC:
18237 if (emit)
18239 if (TARGET_LDRD)
18240 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18241 else
18242 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18244 break;
18246 case POST_DEC:
18247 gcc_assert (TARGET_LDRD);
18248 if (emit)
18249 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18250 break;
18252 case PRE_MODIFY:
18253 case POST_MODIFY:
18254 otherops[0] = operands[1];
18255 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18256 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18258 /* IWMMXT allows offsets larger than ldrd can handle,
18259 fix these up with a pair of ldr. */
18260 if (!TARGET_THUMB2
18261 && CONST_INT_P (otherops[2])
18262 && (INTVAL(otherops[2]) <= -256
18263 || INTVAL(otherops[2]) >= 256))
18265 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18267 if (emit)
18269 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18270 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18272 if (count)
18273 *count = 2;
18275 else
18277 if (emit)
18279 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18280 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18282 if (count)
18283 *count = 2;
18286 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18288 if (emit)
18289 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18291 else
18293 if (emit)
18294 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18296 break;
18298 case PLUS:
18299 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18300 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18302 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18304 case -8:
18305 if (emit)
18306 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18307 return "";
18309 case -4:
18310 if (TARGET_THUMB2)
18311 break;
18312 if (emit)
18313 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18314 return "";
18316 case 4:
18317 if (TARGET_THUMB2)
18318 break;
18319 if (emit)
18320 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18321 return "";
18324 if (TARGET_LDRD
18325 && (REG_P (otherops[2])
18326 || TARGET_THUMB2
18327 || (CONST_INT_P (otherops[2])
18328 && INTVAL (otherops[2]) > -256
18329 && INTVAL (otherops[2]) < 256)))
18331 otherops[0] = operands[1];
18332 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18333 if (emit)
18334 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18335 return "";
18337 /* Fall through */
18339 default:
18340 otherops[0] = adjust_address (operands[0], SImode, 4);
18341 otherops[1] = operands[1];
18342 if (emit)
18344 output_asm_insn ("str%?\t%1, %0", operands);
18345 output_asm_insn ("str%?\t%H1, %0", otherops);
18347 if (count)
18348 *count = 2;
18352 return "";
18355 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18356 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18358 const char *
18359 output_move_quad (rtx *operands)
18361 if (REG_P (operands[0]))
18363 /* Load, or reg->reg move. */
18365 if (MEM_P (operands[1]))
18367 switch (GET_CODE (XEXP (operands[1], 0)))
18369 case REG:
18370 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18371 break;
18373 case LABEL_REF:
18374 case CONST:
18375 output_asm_insn ("adr%?\t%0, %1", operands);
18376 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18377 break;
18379 default:
18380 gcc_unreachable ();
18383 else
18385 rtx ops[2];
18386 int dest, src, i;
18388 gcc_assert (REG_P (operands[1]));
18390 dest = REGNO (operands[0]);
18391 src = REGNO (operands[1]);
18393 /* This seems pretty dumb, but hopefully GCC won't try to do it
18394 very often. */
18395 if (dest < src)
18396 for (i = 0; i < 4; i++)
18398 ops[0] = gen_rtx_REG (SImode, dest + i);
18399 ops[1] = gen_rtx_REG (SImode, src + i);
18400 output_asm_insn ("mov%?\t%0, %1", ops);
18402 else
18403 for (i = 3; i >= 0; i--)
18405 ops[0] = gen_rtx_REG (SImode, dest + i);
18406 ops[1] = gen_rtx_REG (SImode, src + i);
18407 output_asm_insn ("mov%?\t%0, %1", ops);
18411 else
18413 gcc_assert (MEM_P (operands[0]));
18414 gcc_assert (REG_P (operands[1]));
18415 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18417 switch (GET_CODE (XEXP (operands[0], 0)))
18419 case REG:
18420 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18421 break;
18423 default:
18424 gcc_unreachable ();
18428 return "";
18431 /* Output a VFP load or store instruction. */
18433 const char *
18434 output_move_vfp (rtx *operands)
18436 rtx reg, mem, addr, ops[2];
18437 int load = REG_P (operands[0]);
18438 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18439 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18440 const char *templ;
18441 char buff[50];
18442 machine_mode mode;
18444 reg = operands[!load];
18445 mem = operands[load];
18447 mode = GET_MODE (reg);
18449 gcc_assert (REG_P (reg));
18450 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18451 gcc_assert (mode == SFmode
18452 || mode == DFmode
18453 || mode == SImode
18454 || mode == DImode
18455 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18456 gcc_assert (MEM_P (mem));
18458 addr = XEXP (mem, 0);
18460 switch (GET_CODE (addr))
18462 case PRE_DEC:
18463 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18464 ops[0] = XEXP (addr, 0);
18465 ops[1] = reg;
18466 break;
18468 case POST_INC:
18469 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18470 ops[0] = XEXP (addr, 0);
18471 ops[1] = reg;
18472 break;
18474 default:
18475 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18476 ops[0] = reg;
18477 ops[1] = mem;
18478 break;
18481 sprintf (buff, templ,
18482 load ? "ld" : "st",
18483 dp ? "64" : "32",
18484 dp ? "P" : "",
18485 integer_p ? "\t%@ int" : "");
18486 output_asm_insn (buff, ops);
18488 return "";
18491 /* Output a Neon double-word or quad-word load or store, or a load
18492 or store for larger structure modes.
18494 WARNING: The ordering of elements is weird in big-endian mode,
18495 because the EABI requires that vectors stored in memory appear
18496 as though they were stored by a VSTM, as required by the EABI.
18497 GCC RTL defines element ordering based on in-memory order.
18498 This can be different from the architectural ordering of elements
18499 within a NEON register. The intrinsics defined in arm_neon.h use the
18500 NEON register element ordering, not the GCC RTL element ordering.
18502 For example, the in-memory ordering of a big-endian a quadword
18503 vector with 16-bit elements when stored from register pair {d0,d1}
18504 will be (lowest address first, d0[N] is NEON register element N):
18506 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18508 When necessary, quadword registers (dN, dN+1) are moved to ARM
18509 registers from rN in the order:
18511 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18513 So that STM/LDM can be used on vectors in ARM registers, and the
18514 same memory layout will result as if VSTM/VLDM were used.
18516 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18517 possible, which allows use of appropriate alignment tags.
18518 Note that the choice of "64" is independent of the actual vector
18519 element size; this size simply ensures that the behavior is
18520 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18522 Due to limitations of those instructions, use of VST1.64/VLD1.64
18523 is not possible if:
18524 - the address contains PRE_DEC, or
18525 - the mode refers to more than 4 double-word registers
18527 In those cases, it would be possible to replace VSTM/VLDM by a
18528 sequence of instructions; this is not currently implemented since
18529 this is not certain to actually improve performance. */
18531 const char *
18532 output_move_neon (rtx *operands)
18534 rtx reg, mem, addr, ops[2];
18535 int regno, nregs, load = REG_P (operands[0]);
18536 const char *templ;
18537 char buff[50];
18538 machine_mode mode;
18540 reg = operands[!load];
18541 mem = operands[load];
18543 mode = GET_MODE (reg);
18545 gcc_assert (REG_P (reg));
18546 regno = REGNO (reg);
18547 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18548 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18549 || NEON_REGNO_OK_FOR_QUAD (regno));
18550 gcc_assert (VALID_NEON_DREG_MODE (mode)
18551 || VALID_NEON_QREG_MODE (mode)
18552 || VALID_NEON_STRUCT_MODE (mode));
18553 gcc_assert (MEM_P (mem));
18555 addr = XEXP (mem, 0);
18557 /* Strip off const from addresses like (const (plus (...))). */
18558 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18559 addr = XEXP (addr, 0);
18561 switch (GET_CODE (addr))
18563 case POST_INC:
18564 /* We have to use vldm / vstm for too-large modes. */
18565 if (nregs > 4)
18567 templ = "v%smia%%?\t%%0!, %%h1";
18568 ops[0] = XEXP (addr, 0);
18570 else
18572 templ = "v%s1.64\t%%h1, %%A0";
18573 ops[0] = mem;
18575 ops[1] = reg;
18576 break;
18578 case PRE_DEC:
18579 /* We have to use vldm / vstm in this case, since there is no
18580 pre-decrement form of the vld1 / vst1 instructions. */
18581 templ = "v%smdb%%?\t%%0!, %%h1";
18582 ops[0] = XEXP (addr, 0);
18583 ops[1] = reg;
18584 break;
18586 case POST_MODIFY:
18587 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18588 gcc_unreachable ();
18590 case REG:
18591 /* We have to use vldm / vstm for too-large modes. */
18592 if (nregs > 1)
18594 if (nregs > 4)
18595 templ = "v%smia%%?\t%%m0, %%h1";
18596 else
18597 templ = "v%s1.64\t%%h1, %%A0";
18599 ops[0] = mem;
18600 ops[1] = reg;
18601 break;
18603 /* Fall through. */
18604 case LABEL_REF:
18605 case PLUS:
18607 int i;
18608 int overlap = -1;
18609 for (i = 0; i < nregs; i++)
18611 /* We're only using DImode here because it's a convenient size. */
18612 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18613 ops[1] = adjust_address (mem, DImode, 8 * i);
18614 if (reg_overlap_mentioned_p (ops[0], mem))
18616 gcc_assert (overlap == -1);
18617 overlap = i;
18619 else
18621 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18622 output_asm_insn (buff, ops);
18625 if (overlap != -1)
18627 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18628 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18629 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18630 output_asm_insn (buff, ops);
18633 return "";
18636 default:
18637 gcc_unreachable ();
18640 sprintf (buff, templ, load ? "ld" : "st");
18641 output_asm_insn (buff, ops);
18643 return "";
18646 /* Compute and return the length of neon_mov<mode>, where <mode> is
18647 one of VSTRUCT modes: EI, OI, CI or XI. */
18649 arm_attr_length_move_neon (rtx_insn *insn)
18651 rtx reg, mem, addr;
18652 int load;
18653 machine_mode mode;
18655 extract_insn_cached (insn);
18657 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18659 mode = GET_MODE (recog_data.operand[0]);
18660 switch (mode)
18662 case EImode:
18663 case OImode:
18664 return 8;
18665 case CImode:
18666 return 12;
18667 case XImode:
18668 return 16;
18669 default:
18670 gcc_unreachable ();
18674 load = REG_P (recog_data.operand[0]);
18675 reg = recog_data.operand[!load];
18676 mem = recog_data.operand[load];
18678 gcc_assert (MEM_P (mem));
18680 mode = GET_MODE (reg);
18681 addr = XEXP (mem, 0);
18683 /* Strip off const from addresses like (const (plus (...))). */
18684 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18685 addr = XEXP (addr, 0);
18687 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18689 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18690 return insns * 4;
18692 else
18693 return 4;
18696 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18697 return zero. */
18700 arm_address_offset_is_imm (rtx_insn *insn)
18702 rtx mem, addr;
18704 extract_insn_cached (insn);
18706 if (REG_P (recog_data.operand[0]))
18707 return 0;
18709 mem = recog_data.operand[0];
18711 gcc_assert (MEM_P (mem));
18713 addr = XEXP (mem, 0);
18715 if (REG_P (addr)
18716 || (GET_CODE (addr) == PLUS
18717 && REG_P (XEXP (addr, 0))
18718 && CONST_INT_P (XEXP (addr, 1))))
18719 return 1;
18720 else
18721 return 0;
18724 /* Output an ADD r, s, #n where n may be too big for one instruction.
18725 If adding zero to one register, output nothing. */
18726 const char *
18727 output_add_immediate (rtx *operands)
18729 HOST_WIDE_INT n = INTVAL (operands[2]);
18731 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18733 if (n < 0)
18734 output_multi_immediate (operands,
18735 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18736 -n);
18737 else
18738 output_multi_immediate (operands,
18739 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18743 return "";
18746 /* Output a multiple immediate operation.
18747 OPERANDS is the vector of operands referred to in the output patterns.
18748 INSTR1 is the output pattern to use for the first constant.
18749 INSTR2 is the output pattern to use for subsequent constants.
18750 IMMED_OP is the index of the constant slot in OPERANDS.
18751 N is the constant value. */
18752 static const char *
18753 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18754 int immed_op, HOST_WIDE_INT n)
18756 #if HOST_BITS_PER_WIDE_INT > 32
18757 n &= 0xffffffff;
18758 #endif
18760 if (n == 0)
18762 /* Quick and easy output. */
18763 operands[immed_op] = const0_rtx;
18764 output_asm_insn (instr1, operands);
18766 else
18768 int i;
18769 const char * instr = instr1;
18771 /* Note that n is never zero here (which would give no output). */
18772 for (i = 0; i < 32; i += 2)
18774 if (n & (3 << i))
18776 operands[immed_op] = GEN_INT (n & (255 << i));
18777 output_asm_insn (instr, operands);
18778 instr = instr2;
18779 i += 6;
18784 return "";
18787 /* Return the name of a shifter operation. */
18788 static const char *
18789 arm_shift_nmem(enum rtx_code code)
18791 switch (code)
18793 case ASHIFT:
18794 return ARM_LSL_NAME;
18796 case ASHIFTRT:
18797 return "asr";
18799 case LSHIFTRT:
18800 return "lsr";
18802 case ROTATERT:
18803 return "ror";
18805 default:
18806 abort();
18810 /* Return the appropriate ARM instruction for the operation code.
18811 The returned result should not be overwritten. OP is the rtx of the
18812 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18813 was shifted. */
18814 const char *
18815 arithmetic_instr (rtx op, int shift_first_arg)
18817 switch (GET_CODE (op))
18819 case PLUS:
18820 return "add";
18822 case MINUS:
18823 return shift_first_arg ? "rsb" : "sub";
18825 case IOR:
18826 return "orr";
18828 case XOR:
18829 return "eor";
18831 case AND:
18832 return "and";
18834 case ASHIFT:
18835 case ASHIFTRT:
18836 case LSHIFTRT:
18837 case ROTATERT:
18838 return arm_shift_nmem(GET_CODE(op));
18840 default:
18841 gcc_unreachable ();
18845 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18846 for the operation code. The returned result should not be overwritten.
18847 OP is the rtx code of the shift.
18848 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18849 shift. */
18850 static const char *
18851 shift_op (rtx op, HOST_WIDE_INT *amountp)
18853 const char * mnem;
18854 enum rtx_code code = GET_CODE (op);
18856 switch (code)
18858 case ROTATE:
18859 if (!CONST_INT_P (XEXP (op, 1)))
18861 output_operand_lossage ("invalid shift operand");
18862 return NULL;
18865 code = ROTATERT;
18866 *amountp = 32 - INTVAL (XEXP (op, 1));
18867 mnem = "ror";
18868 break;
18870 case ASHIFT:
18871 case ASHIFTRT:
18872 case LSHIFTRT:
18873 case ROTATERT:
18874 mnem = arm_shift_nmem(code);
18875 if (CONST_INT_P (XEXP (op, 1)))
18877 *amountp = INTVAL (XEXP (op, 1));
18879 else if (REG_P (XEXP (op, 1)))
18881 *amountp = -1;
18882 return mnem;
18884 else
18886 output_operand_lossage ("invalid shift operand");
18887 return NULL;
18889 break;
18891 case MULT:
18892 /* We never have to worry about the amount being other than a
18893 power of 2, since this case can never be reloaded from a reg. */
18894 if (!CONST_INT_P (XEXP (op, 1)))
18896 output_operand_lossage ("invalid shift operand");
18897 return NULL;
18900 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18902 /* Amount must be a power of two. */
18903 if (*amountp & (*amountp - 1))
18905 output_operand_lossage ("invalid shift operand");
18906 return NULL;
18909 *amountp = int_log2 (*amountp);
18910 return ARM_LSL_NAME;
18912 default:
18913 output_operand_lossage ("invalid shift operand");
18914 return NULL;
18917 /* This is not 100% correct, but follows from the desire to merge
18918 multiplication by a power of 2 with the recognizer for a
18919 shift. >=32 is not a valid shift for "lsl", so we must try and
18920 output a shift that produces the correct arithmetical result.
18921 Using lsr #32 is identical except for the fact that the carry bit
18922 is not set correctly if we set the flags; but we never use the
18923 carry bit from such an operation, so we can ignore that. */
18924 if (code == ROTATERT)
18925 /* Rotate is just modulo 32. */
18926 *amountp &= 31;
18927 else if (*amountp != (*amountp & 31))
18929 if (code == ASHIFT)
18930 mnem = "lsr";
18931 *amountp = 32;
18934 /* Shifts of 0 are no-ops. */
18935 if (*amountp == 0)
18936 return NULL;
18938 return mnem;
18941 /* Obtain the shift from the POWER of two. */
18943 static HOST_WIDE_INT
18944 int_log2 (HOST_WIDE_INT power)
18946 HOST_WIDE_INT shift = 0;
18948 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18950 gcc_assert (shift <= 31);
18951 shift++;
18954 return shift;
18957 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18958 because /bin/as is horribly restrictive. The judgement about
18959 whether or not each character is 'printable' (and can be output as
18960 is) or not (and must be printed with an octal escape) must be made
18961 with reference to the *host* character set -- the situation is
18962 similar to that discussed in the comments above pp_c_char in
18963 c-pretty-print.c. */
18965 #define MAX_ASCII_LEN 51
18967 void
18968 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18970 int i;
18971 int len_so_far = 0;
18973 fputs ("\t.ascii\t\"", stream);
18975 for (i = 0; i < len; i++)
18977 int c = p[i];
18979 if (len_so_far >= MAX_ASCII_LEN)
18981 fputs ("\"\n\t.ascii\t\"", stream);
18982 len_so_far = 0;
18985 if (ISPRINT (c))
18987 if (c == '\\' || c == '\"')
18989 putc ('\\', stream);
18990 len_so_far++;
18992 putc (c, stream);
18993 len_so_far++;
18995 else
18997 fprintf (stream, "\\%03o", c);
18998 len_so_far += 4;
19002 fputs ("\"\n", stream);
19005 /* Compute the register save mask for registers 0 through 12
19006 inclusive. This code is used by arm_compute_save_reg_mask. */
19008 static unsigned long
19009 arm_compute_save_reg0_reg12_mask (void)
19011 unsigned long func_type = arm_current_func_type ();
19012 unsigned long save_reg_mask = 0;
19013 unsigned int reg;
19015 if (IS_INTERRUPT (func_type))
19017 unsigned int max_reg;
19018 /* Interrupt functions must not corrupt any registers,
19019 even call clobbered ones. If this is a leaf function
19020 we can just examine the registers used by the RTL, but
19021 otherwise we have to assume that whatever function is
19022 called might clobber anything, and so we have to save
19023 all the call-clobbered registers as well. */
19024 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19025 /* FIQ handlers have registers r8 - r12 banked, so
19026 we only need to check r0 - r7, Normal ISRs only
19027 bank r14 and r15, so we must check up to r12.
19028 r13 is the stack pointer which is always preserved,
19029 so we do not need to consider it here. */
19030 max_reg = 7;
19031 else
19032 max_reg = 12;
19034 for (reg = 0; reg <= max_reg; reg++)
19035 if (df_regs_ever_live_p (reg)
19036 || (! crtl->is_leaf && call_used_regs[reg]))
19037 save_reg_mask |= (1 << reg);
19039 /* Also save the pic base register if necessary. */
19040 if (flag_pic
19041 && !TARGET_SINGLE_PIC_BASE
19042 && arm_pic_register != INVALID_REGNUM
19043 && crtl->uses_pic_offset_table)
19044 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19046 else if (IS_VOLATILE(func_type))
19048 /* For noreturn functions we historically omitted register saves
19049 altogether. However this really messes up debugging. As a
19050 compromise save just the frame pointers. Combined with the link
19051 register saved elsewhere this should be sufficient to get
19052 a backtrace. */
19053 if (frame_pointer_needed)
19054 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19055 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19056 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19057 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19058 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19060 else
19062 /* In the normal case we only need to save those registers
19063 which are call saved and which are used by this function. */
19064 for (reg = 0; reg <= 11; reg++)
19065 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
19066 save_reg_mask |= (1 << reg);
19068 /* Handle the frame pointer as a special case. */
19069 if (frame_pointer_needed)
19070 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19072 /* If we aren't loading the PIC register,
19073 don't stack it even though it may be live. */
19074 if (flag_pic
19075 && !TARGET_SINGLE_PIC_BASE
19076 && arm_pic_register != INVALID_REGNUM
19077 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19078 || crtl->uses_pic_offset_table))
19079 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19081 /* The prologue will copy SP into R0, so save it. */
19082 if (IS_STACKALIGN (func_type))
19083 save_reg_mask |= 1;
19086 /* Save registers so the exception handler can modify them. */
19087 if (crtl->calls_eh_return)
19089 unsigned int i;
19091 for (i = 0; ; i++)
19093 reg = EH_RETURN_DATA_REGNO (i);
19094 if (reg == INVALID_REGNUM)
19095 break;
19096 save_reg_mask |= 1 << reg;
19100 return save_reg_mask;
19103 /* Return true if r3 is live at the start of the function. */
19105 static bool
19106 arm_r3_live_at_start_p (void)
19108 /* Just look at cfg info, which is still close enough to correct at this
19109 point. This gives false positives for broken functions that might use
19110 uninitialized data that happens to be allocated in r3, but who cares? */
19111 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19114 /* Compute the number of bytes used to store the static chain register on the
19115 stack, above the stack frame. We need to know this accurately to get the
19116 alignment of the rest of the stack frame correct. */
19118 static int
19119 arm_compute_static_chain_stack_bytes (void)
19121 /* See the defining assertion in arm_expand_prologue. */
19122 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
19123 && IS_NESTED (arm_current_func_type ())
19124 && arm_r3_live_at_start_p ()
19125 && crtl->args.pretend_args_size == 0)
19126 return 4;
19128 return 0;
19131 /* Compute a bit mask of which registers need to be
19132 saved on the stack for the current function.
19133 This is used by arm_get_frame_offsets, which may add extra registers. */
19135 static unsigned long
19136 arm_compute_save_reg_mask (void)
19138 unsigned int save_reg_mask = 0;
19139 unsigned long func_type = arm_current_func_type ();
19140 unsigned int reg;
19142 if (IS_NAKED (func_type))
19143 /* This should never really happen. */
19144 return 0;
19146 /* If we are creating a stack frame, then we must save the frame pointer,
19147 IP (which will hold the old stack pointer), LR and the PC. */
19148 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19149 save_reg_mask |=
19150 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19151 | (1 << IP_REGNUM)
19152 | (1 << LR_REGNUM)
19153 | (1 << PC_REGNUM);
19155 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19157 /* Decide if we need to save the link register.
19158 Interrupt routines have their own banked link register,
19159 so they never need to save it.
19160 Otherwise if we do not use the link register we do not need to save
19161 it. If we are pushing other registers onto the stack however, we
19162 can save an instruction in the epilogue by pushing the link register
19163 now and then popping it back into the PC. This incurs extra memory
19164 accesses though, so we only do it when optimizing for size, and only
19165 if we know that we will not need a fancy return sequence. */
19166 if (df_regs_ever_live_p (LR_REGNUM)
19167 || (save_reg_mask
19168 && optimize_size
19169 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19170 && !crtl->calls_eh_return))
19171 save_reg_mask |= 1 << LR_REGNUM;
19173 if (cfun->machine->lr_save_eliminated)
19174 save_reg_mask &= ~ (1 << LR_REGNUM);
19176 if (TARGET_REALLY_IWMMXT
19177 && ((bit_count (save_reg_mask)
19178 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19179 arm_compute_static_chain_stack_bytes())
19180 ) % 2) != 0)
19182 /* The total number of registers that are going to be pushed
19183 onto the stack is odd. We need to ensure that the stack
19184 is 64-bit aligned before we start to save iWMMXt registers,
19185 and also before we start to create locals. (A local variable
19186 might be a double or long long which we will load/store using
19187 an iWMMXt instruction). Therefore we need to push another
19188 ARM register, so that the stack will be 64-bit aligned. We
19189 try to avoid using the arg registers (r0 -r3) as they might be
19190 used to pass values in a tail call. */
19191 for (reg = 4; reg <= 12; reg++)
19192 if ((save_reg_mask & (1 << reg)) == 0)
19193 break;
19195 if (reg <= 12)
19196 save_reg_mask |= (1 << reg);
19197 else
19199 cfun->machine->sibcall_blocked = 1;
19200 save_reg_mask |= (1 << 3);
19204 /* We may need to push an additional register for use initializing the
19205 PIC base register. */
19206 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19207 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19209 reg = thumb_find_work_register (1 << 4);
19210 if (!call_used_regs[reg])
19211 save_reg_mask |= (1 << reg);
19214 return save_reg_mask;
19218 /* Compute a bit mask of which registers need to be
19219 saved on the stack for the current function. */
19220 static unsigned long
19221 thumb1_compute_save_reg_mask (void)
19223 unsigned long mask;
19224 unsigned reg;
19226 mask = 0;
19227 for (reg = 0; reg < 12; reg ++)
19228 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
19229 mask |= 1 << reg;
19231 if (flag_pic
19232 && !TARGET_SINGLE_PIC_BASE
19233 && arm_pic_register != INVALID_REGNUM
19234 && crtl->uses_pic_offset_table)
19235 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19237 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19238 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19239 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19241 /* LR will also be pushed if any lo regs are pushed. */
19242 if (mask & 0xff || thumb_force_lr_save ())
19243 mask |= (1 << LR_REGNUM);
19245 /* Make sure we have a low work register if we need one.
19246 We will need one if we are going to push a high register,
19247 but we are not currently intending to push a low register. */
19248 if ((mask & 0xff) == 0
19249 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19251 /* Use thumb_find_work_register to choose which register
19252 we will use. If the register is live then we will
19253 have to push it. Use LAST_LO_REGNUM as our fallback
19254 choice for the register to select. */
19255 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19256 /* Make sure the register returned by thumb_find_work_register is
19257 not part of the return value. */
19258 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19259 reg = LAST_LO_REGNUM;
19261 if (! call_used_regs[reg])
19262 mask |= 1 << reg;
19265 /* The 504 below is 8 bytes less than 512 because there are two possible
19266 alignment words. We can't tell here if they will be present or not so we
19267 have to play it safe and assume that they are. */
19268 if ((CALLER_INTERWORKING_SLOT_SIZE +
19269 ROUND_UP_WORD (get_frame_size ()) +
19270 crtl->outgoing_args_size) >= 504)
19272 /* This is the same as the code in thumb1_expand_prologue() which
19273 determines which register to use for stack decrement. */
19274 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19275 if (mask & (1 << reg))
19276 break;
19278 if (reg > LAST_LO_REGNUM)
19280 /* Make sure we have a register available for stack decrement. */
19281 mask |= 1 << LAST_LO_REGNUM;
19285 return mask;
19289 /* Return the number of bytes required to save VFP registers. */
19290 static int
19291 arm_get_vfp_saved_size (void)
19293 unsigned int regno;
19294 int count;
19295 int saved;
19297 saved = 0;
19298 /* Space for saved VFP registers. */
19299 if (TARGET_HARD_FLOAT && TARGET_VFP)
19301 count = 0;
19302 for (regno = FIRST_VFP_REGNUM;
19303 regno < LAST_VFP_REGNUM;
19304 regno += 2)
19306 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19307 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19309 if (count > 0)
19311 /* Workaround ARM10 VFPr1 bug. */
19312 if (count == 2 && !arm_arch6)
19313 count++;
19314 saved += count * 8;
19316 count = 0;
19318 else
19319 count++;
19321 if (count > 0)
19323 if (count == 2 && !arm_arch6)
19324 count++;
19325 saved += count * 8;
19328 return saved;
19332 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19333 everything bar the final return instruction. If simple_return is true,
19334 then do not output epilogue, because it has already been emitted in RTL. */
19335 const char *
19336 output_return_instruction (rtx operand, bool really_return, bool reverse,
19337 bool simple_return)
19339 char conditional[10];
19340 char instr[100];
19341 unsigned reg;
19342 unsigned long live_regs_mask;
19343 unsigned long func_type;
19344 arm_stack_offsets *offsets;
19346 func_type = arm_current_func_type ();
19348 if (IS_NAKED (func_type))
19349 return "";
19351 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19353 /* If this function was declared non-returning, and we have
19354 found a tail call, then we have to trust that the called
19355 function won't return. */
19356 if (really_return)
19358 rtx ops[2];
19360 /* Otherwise, trap an attempted return by aborting. */
19361 ops[0] = operand;
19362 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19363 : "abort");
19364 assemble_external_libcall (ops[1]);
19365 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19368 return "";
19371 gcc_assert (!cfun->calls_alloca || really_return);
19373 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19375 cfun->machine->return_used_this_function = 1;
19377 offsets = arm_get_frame_offsets ();
19378 live_regs_mask = offsets->saved_regs_mask;
19380 if (!simple_return && live_regs_mask)
19382 const char * return_reg;
19384 /* If we do not have any special requirements for function exit
19385 (e.g. interworking) then we can load the return address
19386 directly into the PC. Otherwise we must load it into LR. */
19387 if (really_return
19388 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19389 return_reg = reg_names[PC_REGNUM];
19390 else
19391 return_reg = reg_names[LR_REGNUM];
19393 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19395 /* There are three possible reasons for the IP register
19396 being saved. 1) a stack frame was created, in which case
19397 IP contains the old stack pointer, or 2) an ISR routine
19398 corrupted it, or 3) it was saved to align the stack on
19399 iWMMXt. In case 1, restore IP into SP, otherwise just
19400 restore IP. */
19401 if (frame_pointer_needed)
19403 live_regs_mask &= ~ (1 << IP_REGNUM);
19404 live_regs_mask |= (1 << SP_REGNUM);
19406 else
19407 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19410 /* On some ARM architectures it is faster to use LDR rather than
19411 LDM to load a single register. On other architectures, the
19412 cost is the same. In 26 bit mode, or for exception handlers,
19413 we have to use LDM to load the PC so that the CPSR is also
19414 restored. */
19415 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19416 if (live_regs_mask == (1U << reg))
19417 break;
19419 if (reg <= LAST_ARM_REGNUM
19420 && (reg != LR_REGNUM
19421 || ! really_return
19422 || ! IS_INTERRUPT (func_type)))
19424 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19425 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19427 else
19429 char *p;
19430 int first = 1;
19432 /* Generate the load multiple instruction to restore the
19433 registers. Note we can get here, even if
19434 frame_pointer_needed is true, but only if sp already
19435 points to the base of the saved core registers. */
19436 if (live_regs_mask & (1 << SP_REGNUM))
19438 unsigned HOST_WIDE_INT stack_adjust;
19440 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19441 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19443 if (stack_adjust && arm_arch5 && TARGET_ARM)
19444 if (TARGET_UNIFIED_ASM)
19445 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19446 else
19447 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19448 else
19450 /* If we can't use ldmib (SA110 bug),
19451 then try to pop r3 instead. */
19452 if (stack_adjust)
19453 live_regs_mask |= 1 << 3;
19455 if (TARGET_UNIFIED_ASM)
19456 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19457 else
19458 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19461 else
19462 if (TARGET_UNIFIED_ASM)
19463 sprintf (instr, "pop%s\t{", conditional);
19464 else
19465 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19467 p = instr + strlen (instr);
19469 for (reg = 0; reg <= SP_REGNUM; reg++)
19470 if (live_regs_mask & (1 << reg))
19472 int l = strlen (reg_names[reg]);
19474 if (first)
19475 first = 0;
19476 else
19478 memcpy (p, ", ", 2);
19479 p += 2;
19482 memcpy (p, "%|", 2);
19483 memcpy (p + 2, reg_names[reg], l);
19484 p += l + 2;
19487 if (live_regs_mask & (1 << LR_REGNUM))
19489 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19490 /* If returning from an interrupt, restore the CPSR. */
19491 if (IS_INTERRUPT (func_type))
19492 strcat (p, "^");
19494 else
19495 strcpy (p, "}");
19498 output_asm_insn (instr, & operand);
19500 /* See if we need to generate an extra instruction to
19501 perform the actual function return. */
19502 if (really_return
19503 && func_type != ARM_FT_INTERWORKED
19504 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19506 /* The return has already been handled
19507 by loading the LR into the PC. */
19508 return "";
19512 if (really_return)
19514 switch ((int) ARM_FUNC_TYPE (func_type))
19516 case ARM_FT_ISR:
19517 case ARM_FT_FIQ:
19518 /* ??? This is wrong for unified assembly syntax. */
19519 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19520 break;
19522 case ARM_FT_INTERWORKED:
19523 sprintf (instr, "bx%s\t%%|lr", conditional);
19524 break;
19526 case ARM_FT_EXCEPTION:
19527 /* ??? This is wrong for unified assembly syntax. */
19528 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19529 break;
19531 default:
19532 /* Use bx if it's available. */
19533 if (arm_arch5 || arm_arch4t)
19534 sprintf (instr, "bx%s\t%%|lr", conditional);
19535 else
19536 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19537 break;
19540 output_asm_insn (instr, & operand);
19543 return "";
19546 /* Write the function name into the code section, directly preceding
19547 the function prologue.
19549 Code will be output similar to this:
19551 .ascii "arm_poke_function_name", 0
19552 .align
19554 .word 0xff000000 + (t1 - t0)
19555 arm_poke_function_name
19556 mov ip, sp
19557 stmfd sp!, {fp, ip, lr, pc}
19558 sub fp, ip, #4
19560 When performing a stack backtrace, code can inspect the value
19561 of 'pc' stored at 'fp' + 0. If the trace function then looks
19562 at location pc - 12 and the top 8 bits are set, then we know
19563 that there is a function name embedded immediately preceding this
19564 location and has length ((pc[-3]) & 0xff000000).
19566 We assume that pc is declared as a pointer to an unsigned long.
19568 It is of no benefit to output the function name if we are assembling
19569 a leaf function. These function types will not contain a stack
19570 backtrace structure, therefore it is not possible to determine the
19571 function name. */
19572 void
19573 arm_poke_function_name (FILE *stream, const char *name)
19575 unsigned long alignlength;
19576 unsigned long length;
19577 rtx x;
19579 length = strlen (name) + 1;
19580 alignlength = ROUND_UP_WORD (length);
19582 ASM_OUTPUT_ASCII (stream, name, length);
19583 ASM_OUTPUT_ALIGN (stream, 2);
19584 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19585 assemble_aligned_integer (UNITS_PER_WORD, x);
19588 /* Place some comments into the assembler stream
19589 describing the current function. */
19590 static void
19591 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19593 unsigned long func_type;
19595 /* ??? Do we want to print some of the below anyway? */
19596 if (TARGET_THUMB1)
19597 return;
19599 /* Sanity check. */
19600 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19602 func_type = arm_current_func_type ();
19604 switch ((int) ARM_FUNC_TYPE (func_type))
19606 default:
19607 case ARM_FT_NORMAL:
19608 break;
19609 case ARM_FT_INTERWORKED:
19610 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19611 break;
19612 case ARM_FT_ISR:
19613 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19614 break;
19615 case ARM_FT_FIQ:
19616 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19617 break;
19618 case ARM_FT_EXCEPTION:
19619 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19620 break;
19623 if (IS_NAKED (func_type))
19624 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19626 if (IS_VOLATILE (func_type))
19627 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19629 if (IS_NESTED (func_type))
19630 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19631 if (IS_STACKALIGN (func_type))
19632 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19634 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19635 crtl->args.size,
19636 crtl->args.pretend_args_size, frame_size);
19638 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19639 frame_pointer_needed,
19640 cfun->machine->uses_anonymous_args);
19642 if (cfun->machine->lr_save_eliminated)
19643 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19645 if (crtl->calls_eh_return)
19646 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19650 static void
19651 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19652 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19654 arm_stack_offsets *offsets;
19656 if (TARGET_THUMB1)
19658 int regno;
19660 /* Emit any call-via-reg trampolines that are needed for v4t support
19661 of call_reg and call_value_reg type insns. */
19662 for (regno = 0; regno < LR_REGNUM; regno++)
19664 rtx label = cfun->machine->call_via[regno];
19666 if (label != NULL)
19668 switch_to_section (function_section (current_function_decl));
19669 targetm.asm_out.internal_label (asm_out_file, "L",
19670 CODE_LABEL_NUMBER (label));
19671 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19675 /* ??? Probably not safe to set this here, since it assumes that a
19676 function will be emitted as assembly immediately after we generate
19677 RTL for it. This does not happen for inline functions. */
19678 cfun->machine->return_used_this_function = 0;
19680 else /* TARGET_32BIT */
19682 /* We need to take into account any stack-frame rounding. */
19683 offsets = arm_get_frame_offsets ();
19685 gcc_assert (!use_return_insn (FALSE, NULL)
19686 || (cfun->machine->return_used_this_function != 0)
19687 || offsets->saved_regs == offsets->outgoing_args
19688 || frame_pointer_needed);
19692 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19693 STR and STRD. If an even number of registers are being pushed, one
19694 or more STRD patterns are created for each register pair. If an
19695 odd number of registers are pushed, emit an initial STR followed by
19696 as many STRD instructions as are needed. This works best when the
19697 stack is initially 64-bit aligned (the normal case), since it
19698 ensures that each STRD is also 64-bit aligned. */
19699 static void
19700 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19702 int num_regs = 0;
19703 int i;
19704 int regno;
19705 rtx par = NULL_RTX;
19706 rtx dwarf = NULL_RTX;
19707 rtx tmp;
19708 bool first = true;
19710 num_regs = bit_count (saved_regs_mask);
19712 /* Must be at least one register to save, and can't save SP or PC. */
19713 gcc_assert (num_regs > 0 && num_regs <= 14);
19714 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19715 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19717 /* Create sequence for DWARF info. All the frame-related data for
19718 debugging is held in this wrapper. */
19719 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19721 /* Describe the stack adjustment. */
19722 tmp = gen_rtx_SET (VOIDmode,
19723 stack_pointer_rtx,
19724 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19725 RTX_FRAME_RELATED_P (tmp) = 1;
19726 XVECEXP (dwarf, 0, 0) = tmp;
19728 /* Find the first register. */
19729 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19732 i = 0;
19734 /* If there's an odd number of registers to push. Start off by
19735 pushing a single register. This ensures that subsequent strd
19736 operations are dword aligned (assuming that SP was originally
19737 64-bit aligned). */
19738 if ((num_regs & 1) != 0)
19740 rtx reg, mem, insn;
19742 reg = gen_rtx_REG (SImode, regno);
19743 if (num_regs == 1)
19744 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19745 stack_pointer_rtx));
19746 else
19747 mem = gen_frame_mem (Pmode,
19748 gen_rtx_PRE_MODIFY
19749 (Pmode, stack_pointer_rtx,
19750 plus_constant (Pmode, stack_pointer_rtx,
19751 -4 * num_regs)));
19753 tmp = gen_rtx_SET (VOIDmode, mem, reg);
19754 RTX_FRAME_RELATED_P (tmp) = 1;
19755 insn = emit_insn (tmp);
19756 RTX_FRAME_RELATED_P (insn) = 1;
19757 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19758 tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19759 reg);
19760 RTX_FRAME_RELATED_P (tmp) = 1;
19761 i++;
19762 regno++;
19763 XVECEXP (dwarf, 0, i) = tmp;
19764 first = false;
19767 while (i < num_regs)
19768 if (saved_regs_mask & (1 << regno))
19770 rtx reg1, reg2, mem1, mem2;
19771 rtx tmp0, tmp1, tmp2;
19772 int regno2;
19774 /* Find the register to pair with this one. */
19775 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19776 regno2++)
19779 reg1 = gen_rtx_REG (SImode, regno);
19780 reg2 = gen_rtx_REG (SImode, regno2);
19782 if (first)
19784 rtx insn;
19786 first = false;
19787 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19788 stack_pointer_rtx,
19789 -4 * num_regs));
19790 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19791 stack_pointer_rtx,
19792 -4 * (num_regs - 1)));
19793 tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19794 plus_constant (Pmode, stack_pointer_rtx,
19795 -4 * (num_regs)));
19796 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19797 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19798 RTX_FRAME_RELATED_P (tmp0) = 1;
19799 RTX_FRAME_RELATED_P (tmp1) = 1;
19800 RTX_FRAME_RELATED_P (tmp2) = 1;
19801 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19802 XVECEXP (par, 0, 0) = tmp0;
19803 XVECEXP (par, 0, 1) = tmp1;
19804 XVECEXP (par, 0, 2) = tmp2;
19805 insn = emit_insn (par);
19806 RTX_FRAME_RELATED_P (insn) = 1;
19807 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19809 else
19811 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19812 stack_pointer_rtx,
19813 4 * i));
19814 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19815 stack_pointer_rtx,
19816 4 * (i + 1)));
19817 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19818 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19819 RTX_FRAME_RELATED_P (tmp1) = 1;
19820 RTX_FRAME_RELATED_P (tmp2) = 1;
19821 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19822 XVECEXP (par, 0, 0) = tmp1;
19823 XVECEXP (par, 0, 1) = tmp2;
19824 emit_insn (par);
19827 /* Create unwind information. This is an approximation. */
19828 tmp1 = gen_rtx_SET (VOIDmode,
19829 gen_frame_mem (Pmode,
19830 plus_constant (Pmode,
19831 stack_pointer_rtx,
19832 4 * i)),
19833 reg1);
19834 tmp2 = gen_rtx_SET (VOIDmode,
19835 gen_frame_mem (Pmode,
19836 plus_constant (Pmode,
19837 stack_pointer_rtx,
19838 4 * (i + 1))),
19839 reg2);
19841 RTX_FRAME_RELATED_P (tmp1) = 1;
19842 RTX_FRAME_RELATED_P (tmp2) = 1;
19843 XVECEXP (dwarf, 0, i + 1) = tmp1;
19844 XVECEXP (dwarf, 0, i + 2) = tmp2;
19845 i += 2;
19846 regno = regno2 + 1;
19848 else
19849 regno++;
19851 return;
19854 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19855 whenever possible, otherwise it emits single-word stores. The first store
19856 also allocates stack space for all saved registers, using writeback with
19857 post-addressing mode. All other stores use offset addressing. If no STRD
19858 can be emitted, this function emits a sequence of single-word stores,
19859 and not an STM as before, because single-word stores provide more freedom
19860 scheduling and can be turned into an STM by peephole optimizations. */
19861 static void
19862 arm_emit_strd_push (unsigned long saved_regs_mask)
19864 int num_regs = 0;
19865 int i, j, dwarf_index = 0;
19866 int offset = 0;
19867 rtx dwarf = NULL_RTX;
19868 rtx insn = NULL_RTX;
19869 rtx tmp, mem;
19871 /* TODO: A more efficient code can be emitted by changing the
19872 layout, e.g., first push all pairs that can use STRD to keep the
19873 stack aligned, and then push all other registers. */
19874 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19875 if (saved_regs_mask & (1 << i))
19876 num_regs++;
19878 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19879 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19880 gcc_assert (num_regs > 0);
19882 /* Create sequence for DWARF info. */
19883 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19885 /* For dwarf info, we generate explicit stack update. */
19886 tmp = gen_rtx_SET (VOIDmode,
19887 stack_pointer_rtx,
19888 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19889 RTX_FRAME_RELATED_P (tmp) = 1;
19890 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19892 /* Save registers. */
19893 offset = - 4 * num_regs;
19894 j = 0;
19895 while (j <= LAST_ARM_REGNUM)
19896 if (saved_regs_mask & (1 << j))
19898 if ((j % 2 == 0)
19899 && (saved_regs_mask & (1 << (j + 1))))
19901 /* Current register and previous register form register pair for
19902 which STRD can be generated. */
19903 if (offset < 0)
19905 /* Allocate stack space for all saved registers. */
19906 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19907 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19908 mem = gen_frame_mem (DImode, tmp);
19909 offset = 0;
19911 else if (offset > 0)
19912 mem = gen_frame_mem (DImode,
19913 plus_constant (Pmode,
19914 stack_pointer_rtx,
19915 offset));
19916 else
19917 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19919 tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
19920 RTX_FRAME_RELATED_P (tmp) = 1;
19921 tmp = emit_insn (tmp);
19923 /* Record the first store insn. */
19924 if (dwarf_index == 1)
19925 insn = tmp;
19927 /* Generate dwarf info. */
19928 mem = gen_frame_mem (SImode,
19929 plus_constant (Pmode,
19930 stack_pointer_rtx,
19931 offset));
19932 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19933 RTX_FRAME_RELATED_P (tmp) = 1;
19934 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19936 mem = gen_frame_mem (SImode,
19937 plus_constant (Pmode,
19938 stack_pointer_rtx,
19939 offset + 4));
19940 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
19941 RTX_FRAME_RELATED_P (tmp) = 1;
19942 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19944 offset += 8;
19945 j += 2;
19947 else
19949 /* Emit a single word store. */
19950 if (offset < 0)
19952 /* Allocate stack space for all saved registers. */
19953 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19954 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19955 mem = gen_frame_mem (SImode, tmp);
19956 offset = 0;
19958 else if (offset > 0)
19959 mem = gen_frame_mem (SImode,
19960 plus_constant (Pmode,
19961 stack_pointer_rtx,
19962 offset));
19963 else
19964 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19966 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19967 RTX_FRAME_RELATED_P (tmp) = 1;
19968 tmp = emit_insn (tmp);
19970 /* Record the first store insn. */
19971 if (dwarf_index == 1)
19972 insn = tmp;
19974 /* Generate dwarf info. */
19975 mem = gen_frame_mem (SImode,
19976 plus_constant(Pmode,
19977 stack_pointer_rtx,
19978 offset));
19979 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19980 RTX_FRAME_RELATED_P (tmp) = 1;
19981 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19983 offset += 4;
19984 j += 1;
19987 else
19988 j++;
19990 /* Attach dwarf info to the first insn we generate. */
19991 gcc_assert (insn != NULL_RTX);
19992 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19993 RTX_FRAME_RELATED_P (insn) = 1;
19996 /* Generate and emit an insn that we will recognize as a push_multi.
19997 Unfortunately, since this insn does not reflect very well the actual
19998 semantics of the operation, we need to annotate the insn for the benefit
19999 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20000 MASK for registers that should be annotated for DWARF2 frame unwind
20001 information. */
20002 static rtx
20003 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20005 int num_regs = 0;
20006 int num_dwarf_regs = 0;
20007 int i, j;
20008 rtx par;
20009 rtx dwarf;
20010 int dwarf_par_index;
20011 rtx tmp, reg;
20013 /* We don't record the PC in the dwarf frame information. */
20014 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20016 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20018 if (mask & (1 << i))
20019 num_regs++;
20020 if (dwarf_regs_mask & (1 << i))
20021 num_dwarf_regs++;
20024 gcc_assert (num_regs && num_regs <= 16);
20025 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20027 /* For the body of the insn we are going to generate an UNSPEC in
20028 parallel with several USEs. This allows the insn to be recognized
20029 by the push_multi pattern in the arm.md file.
20031 The body of the insn looks something like this:
20033 (parallel [
20034 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20035 (const_int:SI <num>)))
20036 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20037 (use (reg:SI XX))
20038 (use (reg:SI YY))
20042 For the frame note however, we try to be more explicit and actually
20043 show each register being stored into the stack frame, plus a (single)
20044 decrement of the stack pointer. We do it this way in order to be
20045 friendly to the stack unwinding code, which only wants to see a single
20046 stack decrement per instruction. The RTL we generate for the note looks
20047 something like this:
20049 (sequence [
20050 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20051 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20052 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20053 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20057 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20058 instead we'd have a parallel expression detailing all
20059 the stores to the various memory addresses so that debug
20060 information is more up-to-date. Remember however while writing
20061 this to take care of the constraints with the push instruction.
20063 Note also that this has to be taken care of for the VFP registers.
20065 For more see PR43399. */
20067 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20068 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20069 dwarf_par_index = 1;
20071 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20073 if (mask & (1 << i))
20075 reg = gen_rtx_REG (SImode, i);
20077 XVECEXP (par, 0, 0)
20078 = gen_rtx_SET (VOIDmode,
20079 gen_frame_mem
20080 (BLKmode,
20081 gen_rtx_PRE_MODIFY (Pmode,
20082 stack_pointer_rtx,
20083 plus_constant
20084 (Pmode, stack_pointer_rtx,
20085 -4 * num_regs))
20087 gen_rtx_UNSPEC (BLKmode,
20088 gen_rtvec (1, reg),
20089 UNSPEC_PUSH_MULT));
20091 if (dwarf_regs_mask & (1 << i))
20093 tmp = gen_rtx_SET (VOIDmode,
20094 gen_frame_mem (SImode, stack_pointer_rtx),
20095 reg);
20096 RTX_FRAME_RELATED_P (tmp) = 1;
20097 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20100 break;
20104 for (j = 1, i++; j < num_regs; i++)
20106 if (mask & (1 << i))
20108 reg = gen_rtx_REG (SImode, i);
20110 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20112 if (dwarf_regs_mask & (1 << i))
20115 = gen_rtx_SET (VOIDmode,
20116 gen_frame_mem
20117 (SImode,
20118 plus_constant (Pmode, stack_pointer_rtx,
20119 4 * j)),
20120 reg);
20121 RTX_FRAME_RELATED_P (tmp) = 1;
20122 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20125 j++;
20129 par = emit_insn (par);
20131 tmp = gen_rtx_SET (VOIDmode,
20132 stack_pointer_rtx,
20133 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20134 RTX_FRAME_RELATED_P (tmp) = 1;
20135 XVECEXP (dwarf, 0, 0) = tmp;
20137 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20139 return par;
20142 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20143 SIZE is the offset to be adjusted.
20144 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20145 static void
20146 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20148 rtx dwarf;
20150 RTX_FRAME_RELATED_P (insn) = 1;
20151 dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
20152 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20155 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20156 SAVED_REGS_MASK shows which registers need to be restored.
20158 Unfortunately, since this insn does not reflect very well the actual
20159 semantics of the operation, we need to annotate the insn for the benefit
20160 of DWARF2 frame unwind information. */
20161 static void
20162 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20164 int num_regs = 0;
20165 int i, j;
20166 rtx par;
20167 rtx dwarf = NULL_RTX;
20168 rtx tmp, reg;
20169 bool return_in_pc;
20170 int offset_adj;
20171 int emit_update;
20173 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20174 offset_adj = return_in_pc ? 1 : 0;
20175 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20176 if (saved_regs_mask & (1 << i))
20177 num_regs++;
20179 gcc_assert (num_regs && num_regs <= 16);
20181 /* If SP is in reglist, then we don't emit SP update insn. */
20182 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20184 /* The parallel needs to hold num_regs SETs
20185 and one SET for the stack update. */
20186 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20188 if (return_in_pc)
20190 tmp = ret_rtx;
20191 XVECEXP (par, 0, 0) = tmp;
20194 if (emit_update)
20196 /* Increment the stack pointer, based on there being
20197 num_regs 4-byte registers to restore. */
20198 tmp = gen_rtx_SET (VOIDmode,
20199 stack_pointer_rtx,
20200 plus_constant (Pmode,
20201 stack_pointer_rtx,
20202 4 * num_regs));
20203 RTX_FRAME_RELATED_P (tmp) = 1;
20204 XVECEXP (par, 0, offset_adj) = tmp;
20207 /* Now restore every reg, which may include PC. */
20208 for (j = 0, i = 0; j < num_regs; i++)
20209 if (saved_regs_mask & (1 << i))
20211 reg = gen_rtx_REG (SImode, i);
20212 if ((num_regs == 1) && emit_update && !return_in_pc)
20214 /* Emit single load with writeback. */
20215 tmp = gen_frame_mem (SImode,
20216 gen_rtx_POST_INC (Pmode,
20217 stack_pointer_rtx));
20218 tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
20219 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20220 return;
20223 tmp = gen_rtx_SET (VOIDmode,
20224 reg,
20225 gen_frame_mem
20226 (SImode,
20227 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20228 RTX_FRAME_RELATED_P (tmp) = 1;
20229 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20231 /* We need to maintain a sequence for DWARF info too. As dwarf info
20232 should not have PC, skip PC. */
20233 if (i != PC_REGNUM)
20234 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20236 j++;
20239 if (return_in_pc)
20240 par = emit_jump_insn (par);
20241 else
20242 par = emit_insn (par);
20244 REG_NOTES (par) = dwarf;
20245 if (!return_in_pc)
20246 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20247 stack_pointer_rtx, stack_pointer_rtx);
20250 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20251 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20253 Unfortunately, since this insn does not reflect very well the actual
20254 semantics of the operation, we need to annotate the insn for the benefit
20255 of DWARF2 frame unwind information. */
20256 static void
20257 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20259 int i, j;
20260 rtx par;
20261 rtx dwarf = NULL_RTX;
20262 rtx tmp, reg;
20264 gcc_assert (num_regs && num_regs <= 32);
20266 /* Workaround ARM10 VFPr1 bug. */
20267 if (num_regs == 2 && !arm_arch6)
20269 if (first_reg == 15)
20270 first_reg--;
20272 num_regs++;
20275 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20276 there could be up to 32 D-registers to restore.
20277 If there are more than 16 D-registers, make two recursive calls,
20278 each of which emits one pop_multi instruction. */
20279 if (num_regs > 16)
20281 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20282 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20283 return;
20286 /* The parallel needs to hold num_regs SETs
20287 and one SET for the stack update. */
20288 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20290 /* Increment the stack pointer, based on there being
20291 num_regs 8-byte registers to restore. */
20292 tmp = gen_rtx_SET (VOIDmode,
20293 base_reg,
20294 plus_constant (Pmode, base_reg, 8 * num_regs));
20295 RTX_FRAME_RELATED_P (tmp) = 1;
20296 XVECEXP (par, 0, 0) = tmp;
20298 /* Now show every reg that will be restored, using a SET for each. */
20299 for (j = 0, i=first_reg; j < num_regs; i += 2)
20301 reg = gen_rtx_REG (DFmode, i);
20303 tmp = gen_rtx_SET (VOIDmode,
20304 reg,
20305 gen_frame_mem
20306 (DFmode,
20307 plus_constant (Pmode, base_reg, 8 * j)));
20308 RTX_FRAME_RELATED_P (tmp) = 1;
20309 XVECEXP (par, 0, j + 1) = tmp;
20311 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20313 j++;
20316 par = emit_insn (par);
20317 REG_NOTES (par) = dwarf;
20319 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20320 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20322 RTX_FRAME_RELATED_P (par) = 1;
20323 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20325 else
20326 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20327 base_reg, base_reg);
20330 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20331 number of registers are being popped, multiple LDRD patterns are created for
20332 all register pairs. If odd number of registers are popped, last register is
20333 loaded by using LDR pattern. */
20334 static void
20335 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20337 int num_regs = 0;
20338 int i, j;
20339 rtx par = NULL_RTX;
20340 rtx dwarf = NULL_RTX;
20341 rtx tmp, reg, tmp1;
20342 bool return_in_pc;
20344 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20345 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20346 if (saved_regs_mask & (1 << i))
20347 num_regs++;
20349 gcc_assert (num_regs && num_regs <= 16);
20351 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20352 to be popped. So, if num_regs is even, now it will become odd,
20353 and we can generate pop with PC. If num_regs is odd, it will be
20354 even now, and ldr with return can be generated for PC. */
20355 if (return_in_pc)
20356 num_regs--;
20358 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20360 /* Var j iterates over all the registers to gather all the registers in
20361 saved_regs_mask. Var i gives index of saved registers in stack frame.
20362 A PARALLEL RTX of register-pair is created here, so that pattern for
20363 LDRD can be matched. As PC is always last register to be popped, and
20364 we have already decremented num_regs if PC, we don't have to worry
20365 about PC in this loop. */
20366 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20367 if (saved_regs_mask & (1 << j))
20369 /* Create RTX for memory load. */
20370 reg = gen_rtx_REG (SImode, j);
20371 tmp = gen_rtx_SET (SImode,
20372 reg,
20373 gen_frame_mem (SImode,
20374 plus_constant (Pmode,
20375 stack_pointer_rtx, 4 * i)));
20376 RTX_FRAME_RELATED_P (tmp) = 1;
20378 if (i % 2 == 0)
20380 /* When saved-register index (i) is even, the RTX to be emitted is
20381 yet to be created. Hence create it first. The LDRD pattern we
20382 are generating is :
20383 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20384 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20385 where target registers need not be consecutive. */
20386 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20387 dwarf = NULL_RTX;
20390 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20391 added as 0th element and if i is odd, reg_i is added as 1st element
20392 of LDRD pattern shown above. */
20393 XVECEXP (par, 0, (i % 2)) = tmp;
20394 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20396 if ((i % 2) == 1)
20398 /* When saved-register index (i) is odd, RTXs for both the registers
20399 to be loaded are generated in above given LDRD pattern, and the
20400 pattern can be emitted now. */
20401 par = emit_insn (par);
20402 REG_NOTES (par) = dwarf;
20403 RTX_FRAME_RELATED_P (par) = 1;
20406 i++;
20409 /* If the number of registers pushed is odd AND return_in_pc is false OR
20410 number of registers are even AND return_in_pc is true, last register is
20411 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20412 then LDR with post increment. */
20414 /* Increment the stack pointer, based on there being
20415 num_regs 4-byte registers to restore. */
20416 tmp = gen_rtx_SET (VOIDmode,
20417 stack_pointer_rtx,
20418 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20419 RTX_FRAME_RELATED_P (tmp) = 1;
20420 tmp = emit_insn (tmp);
20421 if (!return_in_pc)
20423 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20424 stack_pointer_rtx, stack_pointer_rtx);
20427 dwarf = NULL_RTX;
20429 if (((num_regs % 2) == 1 && !return_in_pc)
20430 || ((num_regs % 2) == 0 && return_in_pc))
20432 /* Scan for the single register to be popped. Skip until the saved
20433 register is found. */
20434 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20436 /* Gen LDR with post increment here. */
20437 tmp1 = gen_rtx_MEM (SImode,
20438 gen_rtx_POST_INC (SImode,
20439 stack_pointer_rtx));
20440 set_mem_alias_set (tmp1, get_frame_alias_set ());
20442 reg = gen_rtx_REG (SImode, j);
20443 tmp = gen_rtx_SET (SImode, reg, tmp1);
20444 RTX_FRAME_RELATED_P (tmp) = 1;
20445 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20447 if (return_in_pc)
20449 /* If return_in_pc, j must be PC_REGNUM. */
20450 gcc_assert (j == PC_REGNUM);
20451 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20452 XVECEXP (par, 0, 0) = ret_rtx;
20453 XVECEXP (par, 0, 1) = tmp;
20454 par = emit_jump_insn (par);
20456 else
20458 par = emit_insn (tmp);
20459 REG_NOTES (par) = dwarf;
20460 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20461 stack_pointer_rtx, stack_pointer_rtx);
20465 else if ((num_regs % 2) == 1 && return_in_pc)
20467 /* There are 2 registers to be popped. So, generate the pattern
20468 pop_multiple_with_stack_update_and_return to pop in PC. */
20469 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20472 return;
20475 /* LDRD in ARM mode needs consecutive registers as operands. This function
20476 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20477 offset addressing and then generates one separate stack udpate. This provides
20478 more scheduling freedom, compared to writeback on every load. However,
20479 if the function returns using load into PC directly
20480 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20481 before the last load. TODO: Add a peephole optimization to recognize
20482 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20483 peephole optimization to merge the load at stack-offset zero
20484 with the stack update instruction using load with writeback
20485 in post-index addressing mode. */
20486 static void
20487 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20489 int j = 0;
20490 int offset = 0;
20491 rtx par = NULL_RTX;
20492 rtx dwarf = NULL_RTX;
20493 rtx tmp, mem;
20495 /* Restore saved registers. */
20496 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20497 j = 0;
20498 while (j <= LAST_ARM_REGNUM)
20499 if (saved_regs_mask & (1 << j))
20501 if ((j % 2) == 0
20502 && (saved_regs_mask & (1 << (j + 1)))
20503 && (j + 1) != PC_REGNUM)
20505 /* Current register and next register form register pair for which
20506 LDRD can be generated. PC is always the last register popped, and
20507 we handle it separately. */
20508 if (offset > 0)
20509 mem = gen_frame_mem (DImode,
20510 plus_constant (Pmode,
20511 stack_pointer_rtx,
20512 offset));
20513 else
20514 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20516 tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20517 tmp = emit_insn (tmp);
20518 RTX_FRAME_RELATED_P (tmp) = 1;
20520 /* Generate dwarf info. */
20522 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20523 gen_rtx_REG (SImode, j),
20524 NULL_RTX);
20525 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20526 gen_rtx_REG (SImode, j + 1),
20527 dwarf);
20529 REG_NOTES (tmp) = dwarf;
20531 offset += 8;
20532 j += 2;
20534 else if (j != PC_REGNUM)
20536 /* Emit a single word load. */
20537 if (offset > 0)
20538 mem = gen_frame_mem (SImode,
20539 plus_constant (Pmode,
20540 stack_pointer_rtx,
20541 offset));
20542 else
20543 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20545 tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20546 tmp = emit_insn (tmp);
20547 RTX_FRAME_RELATED_P (tmp) = 1;
20549 /* Generate dwarf info. */
20550 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20551 gen_rtx_REG (SImode, j),
20552 NULL_RTX);
20554 offset += 4;
20555 j += 1;
20557 else /* j == PC_REGNUM */
20558 j++;
20560 else
20561 j++;
20563 /* Update the stack. */
20564 if (offset > 0)
20566 tmp = gen_rtx_SET (Pmode,
20567 stack_pointer_rtx,
20568 plus_constant (Pmode,
20569 stack_pointer_rtx,
20570 offset));
20571 tmp = emit_insn (tmp);
20572 arm_add_cfa_adjust_cfa_note (tmp, offset,
20573 stack_pointer_rtx, stack_pointer_rtx);
20574 offset = 0;
20577 if (saved_regs_mask & (1 << PC_REGNUM))
20579 /* Only PC is to be popped. */
20580 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20581 XVECEXP (par, 0, 0) = ret_rtx;
20582 tmp = gen_rtx_SET (SImode,
20583 gen_rtx_REG (SImode, PC_REGNUM),
20584 gen_frame_mem (SImode,
20585 gen_rtx_POST_INC (SImode,
20586 stack_pointer_rtx)));
20587 RTX_FRAME_RELATED_P (tmp) = 1;
20588 XVECEXP (par, 0, 1) = tmp;
20589 par = emit_jump_insn (par);
20591 /* Generate dwarf info. */
20592 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20593 gen_rtx_REG (SImode, PC_REGNUM),
20594 NULL_RTX);
20595 REG_NOTES (par) = dwarf;
20596 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20597 stack_pointer_rtx, stack_pointer_rtx);
20601 /* Calculate the size of the return value that is passed in registers. */
20602 static unsigned
20603 arm_size_return_regs (void)
20605 machine_mode mode;
20607 if (crtl->return_rtx != 0)
20608 mode = GET_MODE (crtl->return_rtx);
20609 else
20610 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20612 return GET_MODE_SIZE (mode);
20615 /* Return true if the current function needs to save/restore LR. */
20616 static bool
20617 thumb_force_lr_save (void)
20619 return !cfun->machine->lr_save_eliminated
20620 && (!leaf_function_p ()
20621 || thumb_far_jump_used_p ()
20622 || df_regs_ever_live_p (LR_REGNUM));
20625 /* We do not know if r3 will be available because
20626 we do have an indirect tailcall happening in this
20627 particular case. */
20628 static bool
20629 is_indirect_tailcall_p (rtx call)
20631 rtx pat = PATTERN (call);
20633 /* Indirect tail call. */
20634 pat = XVECEXP (pat, 0, 0);
20635 if (GET_CODE (pat) == SET)
20636 pat = SET_SRC (pat);
20638 pat = XEXP (XEXP (pat, 0), 0);
20639 return REG_P (pat);
20642 /* Return true if r3 is used by any of the tail call insns in the
20643 current function. */
20644 static bool
20645 any_sibcall_could_use_r3 (void)
20647 edge_iterator ei;
20648 edge e;
20650 if (!crtl->tail_call_emit)
20651 return false;
20652 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20653 if (e->flags & EDGE_SIBCALL)
20655 rtx call = BB_END (e->src);
20656 if (!CALL_P (call))
20657 call = prev_nonnote_nondebug_insn (call);
20658 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20659 if (find_regno_fusage (call, USE, 3)
20660 || is_indirect_tailcall_p (call))
20661 return true;
20663 return false;
20667 /* Compute the distance from register FROM to register TO.
20668 These can be the arg pointer (26), the soft frame pointer (25),
20669 the stack pointer (13) or the hard frame pointer (11).
20670 In thumb mode r7 is used as the soft frame pointer, if needed.
20671 Typical stack layout looks like this:
20673 old stack pointer -> | |
20674 ----
20675 | | \
20676 | | saved arguments for
20677 | | vararg functions
20678 | | /
20680 hard FP & arg pointer -> | | \
20681 | | stack
20682 | | frame
20683 | | /
20685 | | \
20686 | | call saved
20687 | | registers
20688 soft frame pointer -> | | /
20690 | | \
20691 | | local
20692 | | variables
20693 locals base pointer -> | | /
20695 | | \
20696 | | outgoing
20697 | | arguments
20698 current stack pointer -> | | /
20701 For a given function some or all of these stack components
20702 may not be needed, giving rise to the possibility of
20703 eliminating some of the registers.
20705 The values returned by this function must reflect the behavior
20706 of arm_expand_prologue() and arm_compute_save_reg_mask().
20708 The sign of the number returned reflects the direction of stack
20709 growth, so the values are positive for all eliminations except
20710 from the soft frame pointer to the hard frame pointer.
20712 SFP may point just inside the local variables block to ensure correct
20713 alignment. */
20716 /* Calculate stack offsets. These are used to calculate register elimination
20717 offsets and in prologue/epilogue code. Also calculates which registers
20718 should be saved. */
20720 static arm_stack_offsets *
20721 arm_get_frame_offsets (void)
20723 struct arm_stack_offsets *offsets;
20724 unsigned long func_type;
20725 int leaf;
20726 int saved;
20727 int core_saved;
20728 HOST_WIDE_INT frame_size;
20729 int i;
20731 offsets = &cfun->machine->stack_offsets;
20733 /* We need to know if we are a leaf function. Unfortunately, it
20734 is possible to be called after start_sequence has been called,
20735 which causes get_insns to return the insns for the sequence,
20736 not the function, which will cause leaf_function_p to return
20737 the incorrect result.
20739 to know about leaf functions once reload has completed, and the
20740 frame size cannot be changed after that time, so we can safely
20741 use the cached value. */
20743 if (reload_completed)
20744 return offsets;
20746 /* Initially this is the size of the local variables. It will translated
20747 into an offset once we have determined the size of preceding data. */
20748 frame_size = ROUND_UP_WORD (get_frame_size ());
20750 leaf = leaf_function_p ();
20752 /* Space for variadic functions. */
20753 offsets->saved_args = crtl->args.pretend_args_size;
20755 /* In Thumb mode this is incorrect, but never used. */
20756 offsets->frame
20757 = (offsets->saved_args
20758 + arm_compute_static_chain_stack_bytes ()
20759 + (frame_pointer_needed ? 4 : 0));
20761 if (TARGET_32BIT)
20763 unsigned int regno;
20765 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20766 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20767 saved = core_saved;
20769 /* We know that SP will be doubleword aligned on entry, and we must
20770 preserve that condition at any subroutine call. We also require the
20771 soft frame pointer to be doubleword aligned. */
20773 if (TARGET_REALLY_IWMMXT)
20775 /* Check for the call-saved iWMMXt registers. */
20776 for (regno = FIRST_IWMMXT_REGNUM;
20777 regno <= LAST_IWMMXT_REGNUM;
20778 regno++)
20779 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20780 saved += 8;
20783 func_type = arm_current_func_type ();
20784 /* Space for saved VFP registers. */
20785 if (! IS_VOLATILE (func_type)
20786 && TARGET_HARD_FLOAT && TARGET_VFP)
20787 saved += arm_get_vfp_saved_size ();
20789 else /* TARGET_THUMB1 */
20791 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20792 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20793 saved = core_saved;
20794 if (TARGET_BACKTRACE)
20795 saved += 16;
20798 /* Saved registers include the stack frame. */
20799 offsets->saved_regs
20800 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20801 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20803 /* A leaf function does not need any stack alignment if it has nothing
20804 on the stack. */
20805 if (leaf && frame_size == 0
20806 /* However if it calls alloca(), we have a dynamically allocated
20807 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20808 && ! cfun->calls_alloca)
20810 offsets->outgoing_args = offsets->soft_frame;
20811 offsets->locals_base = offsets->soft_frame;
20812 return offsets;
20815 /* Ensure SFP has the correct alignment. */
20816 if (ARM_DOUBLEWORD_ALIGN
20817 && (offsets->soft_frame & 7))
20819 offsets->soft_frame += 4;
20820 /* Try to align stack by pushing an extra reg. Don't bother doing this
20821 when there is a stack frame as the alignment will be rolled into
20822 the normal stack adjustment. */
20823 if (frame_size + crtl->outgoing_args_size == 0)
20825 int reg = -1;
20827 /* Register r3 is caller-saved. Normally it does not need to be
20828 saved on entry by the prologue. However if we choose to save
20829 it for padding then we may confuse the compiler into thinking
20830 a prologue sequence is required when in fact it is not. This
20831 will occur when shrink-wrapping if r3 is used as a scratch
20832 register and there are no other callee-saved writes.
20834 This situation can be avoided when other callee-saved registers
20835 are available and r3 is not mandatory if we choose a callee-saved
20836 register for padding. */
20837 bool prefer_callee_reg_p = false;
20839 /* If it is safe to use r3, then do so. This sometimes
20840 generates better code on Thumb-2 by avoiding the need to
20841 use 32-bit push/pop instructions. */
20842 if (! any_sibcall_could_use_r3 ()
20843 && arm_size_return_regs () <= 12
20844 && (offsets->saved_regs_mask & (1 << 3)) == 0
20845 && (TARGET_THUMB2
20846 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20848 reg = 3;
20849 if (!TARGET_THUMB2)
20850 prefer_callee_reg_p = true;
20852 if (reg == -1
20853 || prefer_callee_reg_p)
20855 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20857 /* Avoid fixed registers; they may be changed at
20858 arbitrary times so it's unsafe to restore them
20859 during the epilogue. */
20860 if (!fixed_regs[i]
20861 && (offsets->saved_regs_mask & (1 << i)) == 0)
20863 reg = i;
20864 break;
20869 if (reg != -1)
20871 offsets->saved_regs += 4;
20872 offsets->saved_regs_mask |= (1 << reg);
20877 offsets->locals_base = offsets->soft_frame + frame_size;
20878 offsets->outgoing_args = (offsets->locals_base
20879 + crtl->outgoing_args_size);
20881 if (ARM_DOUBLEWORD_ALIGN)
20883 /* Ensure SP remains doubleword aligned. */
20884 if (offsets->outgoing_args & 7)
20885 offsets->outgoing_args += 4;
20886 gcc_assert (!(offsets->outgoing_args & 7));
20889 return offsets;
20893 /* Calculate the relative offsets for the different stack pointers. Positive
20894 offsets are in the direction of stack growth. */
20896 HOST_WIDE_INT
20897 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20899 arm_stack_offsets *offsets;
20901 offsets = arm_get_frame_offsets ();
20903 /* OK, now we have enough information to compute the distances.
20904 There must be an entry in these switch tables for each pair
20905 of registers in ELIMINABLE_REGS, even if some of the entries
20906 seem to be redundant or useless. */
20907 switch (from)
20909 case ARG_POINTER_REGNUM:
20910 switch (to)
20912 case THUMB_HARD_FRAME_POINTER_REGNUM:
20913 return 0;
20915 case FRAME_POINTER_REGNUM:
20916 /* This is the reverse of the soft frame pointer
20917 to hard frame pointer elimination below. */
20918 return offsets->soft_frame - offsets->saved_args;
20920 case ARM_HARD_FRAME_POINTER_REGNUM:
20921 /* This is only non-zero in the case where the static chain register
20922 is stored above the frame. */
20923 return offsets->frame - offsets->saved_args - 4;
20925 case STACK_POINTER_REGNUM:
20926 /* If nothing has been pushed on the stack at all
20927 then this will return -4. This *is* correct! */
20928 return offsets->outgoing_args - (offsets->saved_args + 4);
20930 default:
20931 gcc_unreachable ();
20933 gcc_unreachable ();
20935 case FRAME_POINTER_REGNUM:
20936 switch (to)
20938 case THUMB_HARD_FRAME_POINTER_REGNUM:
20939 return 0;
20941 case ARM_HARD_FRAME_POINTER_REGNUM:
20942 /* The hard frame pointer points to the top entry in the
20943 stack frame. The soft frame pointer to the bottom entry
20944 in the stack frame. If there is no stack frame at all,
20945 then they are identical. */
20947 return offsets->frame - offsets->soft_frame;
20949 case STACK_POINTER_REGNUM:
20950 return offsets->outgoing_args - offsets->soft_frame;
20952 default:
20953 gcc_unreachable ();
20955 gcc_unreachable ();
20957 default:
20958 /* You cannot eliminate from the stack pointer.
20959 In theory you could eliminate from the hard frame
20960 pointer to the stack pointer, but this will never
20961 happen, since if a stack frame is not needed the
20962 hard frame pointer will never be used. */
20963 gcc_unreachable ();
20967 /* Given FROM and TO register numbers, say whether this elimination is
20968 allowed. Frame pointer elimination is automatically handled.
20970 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20971 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20972 pointer, we must eliminate FRAME_POINTER_REGNUM into
20973 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20974 ARG_POINTER_REGNUM. */
20976 bool
20977 arm_can_eliminate (const int from, const int to)
20979 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20980 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20981 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20982 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20983 true);
20986 /* Emit RTL to save coprocessor registers on function entry. Returns the
20987 number of bytes pushed. */
20989 static int
20990 arm_save_coproc_regs(void)
20992 int saved_size = 0;
20993 unsigned reg;
20994 unsigned start_reg;
20995 rtx insn;
20997 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20998 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21000 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21001 insn = gen_rtx_MEM (V2SImode, insn);
21002 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21003 RTX_FRAME_RELATED_P (insn) = 1;
21004 saved_size += 8;
21007 if (TARGET_HARD_FLOAT && TARGET_VFP)
21009 start_reg = FIRST_VFP_REGNUM;
21011 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21013 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21014 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21016 if (start_reg != reg)
21017 saved_size += vfp_emit_fstmd (start_reg,
21018 (reg - start_reg) / 2);
21019 start_reg = reg + 2;
21022 if (start_reg != reg)
21023 saved_size += vfp_emit_fstmd (start_reg,
21024 (reg - start_reg) / 2);
21026 return saved_size;
21030 /* Set the Thumb frame pointer from the stack pointer. */
21032 static void
21033 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21035 HOST_WIDE_INT amount;
21036 rtx insn, dwarf;
21038 amount = offsets->outgoing_args - offsets->locals_base;
21039 if (amount < 1024)
21040 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21041 stack_pointer_rtx, GEN_INT (amount)));
21042 else
21044 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21045 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21046 expects the first two operands to be the same. */
21047 if (TARGET_THUMB2)
21049 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21050 stack_pointer_rtx,
21051 hard_frame_pointer_rtx));
21053 else
21055 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21056 hard_frame_pointer_rtx,
21057 stack_pointer_rtx));
21059 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
21060 plus_constant (Pmode, stack_pointer_rtx, amount));
21061 RTX_FRAME_RELATED_P (dwarf) = 1;
21062 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21065 RTX_FRAME_RELATED_P (insn) = 1;
21068 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21069 function. */
21070 void
21071 arm_expand_prologue (void)
21073 rtx amount;
21074 rtx insn;
21075 rtx ip_rtx;
21076 unsigned long live_regs_mask;
21077 unsigned long func_type;
21078 int fp_offset = 0;
21079 int saved_pretend_args = 0;
21080 int saved_regs = 0;
21081 unsigned HOST_WIDE_INT args_to_push;
21082 arm_stack_offsets *offsets;
21084 func_type = arm_current_func_type ();
21086 /* Naked functions don't have prologues. */
21087 if (IS_NAKED (func_type))
21088 return;
21090 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21091 args_to_push = crtl->args.pretend_args_size;
21093 /* Compute which register we will have to save onto the stack. */
21094 offsets = arm_get_frame_offsets ();
21095 live_regs_mask = offsets->saved_regs_mask;
21097 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21099 if (IS_STACKALIGN (func_type))
21101 rtx r0, r1;
21103 /* Handle a word-aligned stack pointer. We generate the following:
21105 mov r0, sp
21106 bic r1, r0, #7
21107 mov sp, r1
21108 <save and restore r0 in normal prologue/epilogue>
21109 mov sp, r0
21110 bx lr
21112 The unwinder doesn't need to know about the stack realignment.
21113 Just tell it we saved SP in r0. */
21114 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21116 r0 = gen_rtx_REG (SImode, 0);
21117 r1 = gen_rtx_REG (SImode, 1);
21119 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21120 RTX_FRAME_RELATED_P (insn) = 1;
21121 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21123 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21125 /* ??? The CFA changes here, which may cause GDB to conclude that it
21126 has entered a different function. That said, the unwind info is
21127 correct, individually, before and after this instruction because
21128 we've described the save of SP, which will override the default
21129 handling of SP as restoring from the CFA. */
21130 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21133 /* For APCS frames, if IP register is clobbered
21134 when creating frame, save that register in a special
21135 way. */
21136 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21138 if (IS_INTERRUPT (func_type))
21140 /* Interrupt functions must not corrupt any registers.
21141 Creating a frame pointer however, corrupts the IP
21142 register, so we must push it first. */
21143 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21145 /* Do not set RTX_FRAME_RELATED_P on this insn.
21146 The dwarf stack unwinding code only wants to see one
21147 stack decrement per function, and this is not it. If
21148 this instruction is labeled as being part of the frame
21149 creation sequence then dwarf2out_frame_debug_expr will
21150 die when it encounters the assignment of IP to FP
21151 later on, since the use of SP here establishes SP as
21152 the CFA register and not IP.
21154 Anyway this instruction is not really part of the stack
21155 frame creation although it is part of the prologue. */
21157 else if (IS_NESTED (func_type))
21159 /* The static chain register is the same as the IP register
21160 used as a scratch register during stack frame creation.
21161 To get around this need to find somewhere to store IP
21162 whilst the frame is being created. We try the following
21163 places in order:
21165 1. The last argument register r3 if it is available.
21166 2. A slot on the stack above the frame if there are no
21167 arguments to push onto the stack.
21168 3. Register r3 again, after pushing the argument registers
21169 onto the stack, if this is a varargs function.
21170 4. The last slot on the stack created for the arguments to
21171 push, if this isn't a varargs function.
21173 Note - we only need to tell the dwarf2 backend about the SP
21174 adjustment in the second variant; the static chain register
21175 doesn't need to be unwound, as it doesn't contain a value
21176 inherited from the caller. */
21178 if (!arm_r3_live_at_start_p ())
21179 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21180 else if (args_to_push == 0)
21182 rtx addr, dwarf;
21184 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21185 saved_regs += 4;
21187 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21188 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21189 fp_offset = 4;
21191 /* Just tell the dwarf backend that we adjusted SP. */
21192 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21193 plus_constant (Pmode, stack_pointer_rtx,
21194 -fp_offset));
21195 RTX_FRAME_RELATED_P (insn) = 1;
21196 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21198 else
21200 /* Store the args on the stack. */
21201 if (cfun->machine->uses_anonymous_args)
21203 insn
21204 = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21205 (0xf0 >> (args_to_push / 4)) & 0xf);
21206 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21207 saved_pretend_args = 1;
21209 else
21211 rtx addr, dwarf;
21213 if (args_to_push == 4)
21214 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21215 else
21216 addr
21217 = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21218 plus_constant (Pmode,
21219 stack_pointer_rtx,
21220 -args_to_push));
21222 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21224 /* Just tell the dwarf backend that we adjusted SP. */
21225 dwarf
21226 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21227 plus_constant (Pmode, stack_pointer_rtx,
21228 -args_to_push));
21229 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21232 RTX_FRAME_RELATED_P (insn) = 1;
21233 fp_offset = args_to_push;
21234 args_to_push = 0;
21238 insn = emit_set_insn (ip_rtx,
21239 plus_constant (Pmode, stack_pointer_rtx,
21240 fp_offset));
21241 RTX_FRAME_RELATED_P (insn) = 1;
21244 if (args_to_push)
21246 /* Push the argument registers, or reserve space for them. */
21247 if (cfun->machine->uses_anonymous_args)
21248 insn = emit_multi_reg_push
21249 ((0xf0 >> (args_to_push / 4)) & 0xf,
21250 (0xf0 >> (args_to_push / 4)) & 0xf);
21251 else
21252 insn = emit_insn
21253 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21254 GEN_INT (- args_to_push)));
21255 RTX_FRAME_RELATED_P (insn) = 1;
21258 /* If this is an interrupt service routine, and the link register
21259 is going to be pushed, and we're not generating extra
21260 push of IP (needed when frame is needed and frame layout if apcs),
21261 subtracting four from LR now will mean that the function return
21262 can be done with a single instruction. */
21263 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21264 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21265 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21266 && TARGET_ARM)
21268 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21270 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21273 if (live_regs_mask)
21275 unsigned long dwarf_regs_mask = live_regs_mask;
21277 saved_regs += bit_count (live_regs_mask) * 4;
21278 if (optimize_size && !frame_pointer_needed
21279 && saved_regs == offsets->saved_regs - offsets->saved_args)
21281 /* If no coprocessor registers are being pushed and we don't have
21282 to worry about a frame pointer then push extra registers to
21283 create the stack frame. This is done is a way that does not
21284 alter the frame layout, so is independent of the epilogue. */
21285 int n;
21286 int frame;
21287 n = 0;
21288 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21289 n++;
21290 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21291 if (frame && n * 4 >= frame)
21293 n = frame / 4;
21294 live_regs_mask |= (1 << n) - 1;
21295 saved_regs += frame;
21299 if (TARGET_LDRD
21300 && current_tune->prefer_ldrd_strd
21301 && !optimize_function_for_size_p (cfun))
21303 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21304 if (TARGET_THUMB2)
21305 thumb2_emit_strd_push (live_regs_mask);
21306 else if (TARGET_ARM
21307 && !TARGET_APCS_FRAME
21308 && !IS_INTERRUPT (func_type))
21309 arm_emit_strd_push (live_regs_mask);
21310 else
21312 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21313 RTX_FRAME_RELATED_P (insn) = 1;
21316 else
21318 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21319 RTX_FRAME_RELATED_P (insn) = 1;
21323 if (! IS_VOLATILE (func_type))
21324 saved_regs += arm_save_coproc_regs ();
21326 if (frame_pointer_needed && TARGET_ARM)
21328 /* Create the new frame pointer. */
21329 if (TARGET_APCS_FRAME)
21331 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21332 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21333 RTX_FRAME_RELATED_P (insn) = 1;
21335 if (IS_NESTED (func_type))
21337 /* Recover the static chain register. */
21338 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21339 insn = gen_rtx_REG (SImode, 3);
21340 else
21342 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21343 insn = gen_frame_mem (SImode, insn);
21345 emit_set_insn (ip_rtx, insn);
21346 /* Add a USE to stop propagate_one_insn() from barfing. */
21347 emit_insn (gen_force_register_use (ip_rtx));
21350 else
21352 insn = GEN_INT (saved_regs - 4);
21353 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21354 stack_pointer_rtx, insn));
21355 RTX_FRAME_RELATED_P (insn) = 1;
21359 if (flag_stack_usage_info)
21360 current_function_static_stack_size
21361 = offsets->outgoing_args - offsets->saved_args;
21363 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21365 /* This add can produce multiple insns for a large constant, so we
21366 need to get tricky. */
21367 rtx_insn *last = get_last_insn ();
21369 amount = GEN_INT (offsets->saved_args + saved_regs
21370 - offsets->outgoing_args);
21372 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21373 amount));
21376 last = last ? NEXT_INSN (last) : get_insns ();
21377 RTX_FRAME_RELATED_P (last) = 1;
21379 while (last != insn);
21381 /* If the frame pointer is needed, emit a special barrier that
21382 will prevent the scheduler from moving stores to the frame
21383 before the stack adjustment. */
21384 if (frame_pointer_needed)
21385 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21386 hard_frame_pointer_rtx));
21390 if (frame_pointer_needed && TARGET_THUMB2)
21391 thumb_set_frame_pointer (offsets);
21393 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21395 unsigned long mask;
21397 mask = live_regs_mask;
21398 mask &= THUMB2_WORK_REGS;
21399 if (!IS_NESTED (func_type))
21400 mask |= (1 << IP_REGNUM);
21401 arm_load_pic_register (mask);
21404 /* If we are profiling, make sure no instructions are scheduled before
21405 the call to mcount. Similarly if the user has requested no
21406 scheduling in the prolog. Similarly if we want non-call exceptions
21407 using the EABI unwinder, to prevent faulting instructions from being
21408 swapped with a stack adjustment. */
21409 if (crtl->profile || !TARGET_SCHED_PROLOG
21410 || (arm_except_unwind_info (&global_options) == UI_TARGET
21411 && cfun->can_throw_non_call_exceptions))
21412 emit_insn (gen_blockage ());
21414 /* If the link register is being kept alive, with the return address in it,
21415 then make sure that it does not get reused by the ce2 pass. */
21416 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21417 cfun->machine->lr_save_eliminated = 1;
21420 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21421 static void
21422 arm_print_condition (FILE *stream)
21424 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21426 /* Branch conversion is not implemented for Thumb-2. */
21427 if (TARGET_THUMB)
21429 output_operand_lossage ("predicated Thumb instruction");
21430 return;
21432 if (current_insn_predicate != NULL)
21434 output_operand_lossage
21435 ("predicated instruction in conditional sequence");
21436 return;
21439 fputs (arm_condition_codes[arm_current_cc], stream);
21441 else if (current_insn_predicate)
21443 enum arm_cond_code code;
21445 if (TARGET_THUMB1)
21447 output_operand_lossage ("predicated Thumb instruction");
21448 return;
21451 code = get_arm_condition_code (current_insn_predicate);
21452 fputs (arm_condition_codes[code], stream);
21457 /* Globally reserved letters: acln
21458 Puncutation letters currently used: @_|?().!#
21459 Lower case letters currently used: bcdefhimpqtvwxyz
21460 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21461 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21463 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21465 If CODE is 'd', then the X is a condition operand and the instruction
21466 should only be executed if the condition is true.
21467 if CODE is 'D', then the X is a condition operand and the instruction
21468 should only be executed if the condition is false: however, if the mode
21469 of the comparison is CCFPEmode, then always execute the instruction -- we
21470 do this because in these circumstances !GE does not necessarily imply LT;
21471 in these cases the instruction pattern will take care to make sure that
21472 an instruction containing %d will follow, thereby undoing the effects of
21473 doing this instruction unconditionally.
21474 If CODE is 'N' then X is a floating point operand that must be negated
21475 before output.
21476 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21477 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21478 static void
21479 arm_print_operand (FILE *stream, rtx x, int code)
21481 switch (code)
21483 case '@':
21484 fputs (ASM_COMMENT_START, stream);
21485 return;
21487 case '_':
21488 fputs (user_label_prefix, stream);
21489 return;
21491 case '|':
21492 fputs (REGISTER_PREFIX, stream);
21493 return;
21495 case '?':
21496 arm_print_condition (stream);
21497 return;
21499 case '(':
21500 /* Nothing in unified syntax, otherwise the current condition code. */
21501 if (!TARGET_UNIFIED_ASM)
21502 arm_print_condition (stream);
21503 break;
21505 case ')':
21506 /* The current condition code in unified syntax, otherwise nothing. */
21507 if (TARGET_UNIFIED_ASM)
21508 arm_print_condition (stream);
21509 break;
21511 case '.':
21512 /* The current condition code for a condition code setting instruction.
21513 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21514 if (TARGET_UNIFIED_ASM)
21516 fputc('s', stream);
21517 arm_print_condition (stream);
21519 else
21521 arm_print_condition (stream);
21522 fputc('s', stream);
21524 return;
21526 case '!':
21527 /* If the instruction is conditionally executed then print
21528 the current condition code, otherwise print 's'. */
21529 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21530 if (current_insn_predicate)
21531 arm_print_condition (stream);
21532 else
21533 fputc('s', stream);
21534 break;
21536 /* %# is a "break" sequence. It doesn't output anything, but is used to
21537 separate e.g. operand numbers from following text, if that text consists
21538 of further digits which we don't want to be part of the operand
21539 number. */
21540 case '#':
21541 return;
21543 case 'N':
21545 REAL_VALUE_TYPE r;
21546 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21547 r = real_value_negate (&r);
21548 fprintf (stream, "%s", fp_const_from_val (&r));
21550 return;
21552 /* An integer or symbol address without a preceding # sign. */
21553 case 'c':
21554 switch (GET_CODE (x))
21556 case CONST_INT:
21557 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21558 break;
21560 case SYMBOL_REF:
21561 output_addr_const (stream, x);
21562 break;
21564 case CONST:
21565 if (GET_CODE (XEXP (x, 0)) == PLUS
21566 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21568 output_addr_const (stream, x);
21569 break;
21571 /* Fall through. */
21573 default:
21574 output_operand_lossage ("Unsupported operand for code '%c'", code);
21576 return;
21578 /* An integer that we want to print in HEX. */
21579 case 'x':
21580 switch (GET_CODE (x))
21582 case CONST_INT:
21583 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21584 break;
21586 default:
21587 output_operand_lossage ("Unsupported operand for code '%c'", code);
21589 return;
21591 case 'B':
21592 if (CONST_INT_P (x))
21594 HOST_WIDE_INT val;
21595 val = ARM_SIGN_EXTEND (~INTVAL (x));
21596 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21598 else
21600 putc ('~', stream);
21601 output_addr_const (stream, x);
21603 return;
21605 case 'b':
21606 /* Print the log2 of a CONST_INT. */
21608 HOST_WIDE_INT val;
21610 if (!CONST_INT_P (x)
21611 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21612 output_operand_lossage ("Unsupported operand for code '%c'", code);
21613 else
21614 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21616 return;
21618 case 'L':
21619 /* The low 16 bits of an immediate constant. */
21620 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21621 return;
21623 case 'i':
21624 fprintf (stream, "%s", arithmetic_instr (x, 1));
21625 return;
21627 case 'I':
21628 fprintf (stream, "%s", arithmetic_instr (x, 0));
21629 return;
21631 case 'S':
21633 HOST_WIDE_INT val;
21634 const char *shift;
21636 shift = shift_op (x, &val);
21638 if (shift)
21640 fprintf (stream, ", %s ", shift);
21641 if (val == -1)
21642 arm_print_operand (stream, XEXP (x, 1), 0);
21643 else
21644 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21647 return;
21649 /* An explanation of the 'Q', 'R' and 'H' register operands:
21651 In a pair of registers containing a DI or DF value the 'Q'
21652 operand returns the register number of the register containing
21653 the least significant part of the value. The 'R' operand returns
21654 the register number of the register containing the most
21655 significant part of the value.
21657 The 'H' operand returns the higher of the two register numbers.
21658 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21659 same as the 'Q' operand, since the most significant part of the
21660 value is held in the lower number register. The reverse is true
21661 on systems where WORDS_BIG_ENDIAN is false.
21663 The purpose of these operands is to distinguish between cases
21664 where the endian-ness of the values is important (for example
21665 when they are added together), and cases where the endian-ness
21666 is irrelevant, but the order of register operations is important.
21667 For example when loading a value from memory into a register
21668 pair, the endian-ness does not matter. Provided that the value
21669 from the lower memory address is put into the lower numbered
21670 register, and the value from the higher address is put into the
21671 higher numbered register, the load will work regardless of whether
21672 the value being loaded is big-wordian or little-wordian. The
21673 order of the two register loads can matter however, if the address
21674 of the memory location is actually held in one of the registers
21675 being overwritten by the load.
21677 The 'Q' and 'R' constraints are also available for 64-bit
21678 constants. */
21679 case 'Q':
21680 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21682 rtx part = gen_lowpart (SImode, x);
21683 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21684 return;
21687 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21689 output_operand_lossage ("invalid operand for code '%c'", code);
21690 return;
21693 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21694 return;
21696 case 'R':
21697 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21699 machine_mode mode = GET_MODE (x);
21700 rtx part;
21702 if (mode == VOIDmode)
21703 mode = DImode;
21704 part = gen_highpart_mode (SImode, mode, x);
21705 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21706 return;
21709 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21711 output_operand_lossage ("invalid operand for code '%c'", code);
21712 return;
21715 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21716 return;
21718 case 'H':
21719 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21721 output_operand_lossage ("invalid operand for code '%c'", code);
21722 return;
21725 asm_fprintf (stream, "%r", REGNO (x) + 1);
21726 return;
21728 case 'J':
21729 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21731 output_operand_lossage ("invalid operand for code '%c'", code);
21732 return;
21735 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21736 return;
21738 case 'K':
21739 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21741 output_operand_lossage ("invalid operand for code '%c'", code);
21742 return;
21745 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21746 return;
21748 case 'm':
21749 asm_fprintf (stream, "%r",
21750 REG_P (XEXP (x, 0))
21751 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21752 return;
21754 case 'M':
21755 asm_fprintf (stream, "{%r-%r}",
21756 REGNO (x),
21757 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21758 return;
21760 /* Like 'M', but writing doubleword vector registers, for use by Neon
21761 insns. */
21762 case 'h':
21764 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21765 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21766 if (numregs == 1)
21767 asm_fprintf (stream, "{d%d}", regno);
21768 else
21769 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21771 return;
21773 case 'd':
21774 /* CONST_TRUE_RTX means always -- that's the default. */
21775 if (x == const_true_rtx)
21776 return;
21778 if (!COMPARISON_P (x))
21780 output_operand_lossage ("invalid operand for code '%c'", code);
21781 return;
21784 fputs (arm_condition_codes[get_arm_condition_code (x)],
21785 stream);
21786 return;
21788 case 'D':
21789 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21790 want to do that. */
21791 if (x == const_true_rtx)
21793 output_operand_lossage ("instruction never executed");
21794 return;
21796 if (!COMPARISON_P (x))
21798 output_operand_lossage ("invalid operand for code '%c'", code);
21799 return;
21802 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21803 (get_arm_condition_code (x))],
21804 stream);
21805 return;
21807 case 's':
21808 case 'V':
21809 case 'W':
21810 case 'X':
21811 case 'Y':
21812 case 'Z':
21813 /* Former Maverick support, removed after GCC-4.7. */
21814 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21815 return;
21817 case 'U':
21818 if (!REG_P (x)
21819 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21820 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21821 /* Bad value for wCG register number. */
21823 output_operand_lossage ("invalid operand for code '%c'", code);
21824 return;
21827 else
21828 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21829 return;
21831 /* Print an iWMMXt control register name. */
21832 case 'w':
21833 if (!CONST_INT_P (x)
21834 || INTVAL (x) < 0
21835 || INTVAL (x) >= 16)
21836 /* Bad value for wC register number. */
21838 output_operand_lossage ("invalid operand for code '%c'", code);
21839 return;
21842 else
21844 static const char * wc_reg_names [16] =
21846 "wCID", "wCon", "wCSSF", "wCASF",
21847 "wC4", "wC5", "wC6", "wC7",
21848 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21849 "wC12", "wC13", "wC14", "wC15"
21852 fputs (wc_reg_names [INTVAL (x)], stream);
21854 return;
21856 /* Print the high single-precision register of a VFP double-precision
21857 register. */
21858 case 'p':
21860 machine_mode mode = GET_MODE (x);
21861 int regno;
21863 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21865 output_operand_lossage ("invalid operand for code '%c'", code);
21866 return;
21869 regno = REGNO (x);
21870 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21872 output_operand_lossage ("invalid operand for code '%c'", code);
21873 return;
21876 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21878 return;
21880 /* Print a VFP/Neon double precision or quad precision register name. */
21881 case 'P':
21882 case 'q':
21884 machine_mode mode = GET_MODE (x);
21885 int is_quad = (code == 'q');
21886 int regno;
21888 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21890 output_operand_lossage ("invalid operand for code '%c'", code);
21891 return;
21894 if (!REG_P (x)
21895 || !IS_VFP_REGNUM (REGNO (x)))
21897 output_operand_lossage ("invalid operand for code '%c'", code);
21898 return;
21901 regno = REGNO (x);
21902 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21903 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21905 output_operand_lossage ("invalid operand for code '%c'", code);
21906 return;
21909 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21910 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21912 return;
21914 /* These two codes print the low/high doubleword register of a Neon quad
21915 register, respectively. For pair-structure types, can also print
21916 low/high quadword registers. */
21917 case 'e':
21918 case 'f':
21920 machine_mode mode = GET_MODE (x);
21921 int regno;
21923 if ((GET_MODE_SIZE (mode) != 16
21924 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21926 output_operand_lossage ("invalid operand for code '%c'", code);
21927 return;
21930 regno = REGNO (x);
21931 if (!NEON_REGNO_OK_FOR_QUAD (regno))
21933 output_operand_lossage ("invalid operand for code '%c'", code);
21934 return;
21937 if (GET_MODE_SIZE (mode) == 16)
21938 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21939 + (code == 'f' ? 1 : 0));
21940 else
21941 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21942 + (code == 'f' ? 1 : 0));
21944 return;
21946 /* Print a VFPv3 floating-point constant, represented as an integer
21947 index. */
21948 case 'G':
21950 int index = vfp3_const_double_index (x);
21951 gcc_assert (index != -1);
21952 fprintf (stream, "%d", index);
21954 return;
21956 /* Print bits representing opcode features for Neon.
21958 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21959 and polynomials as unsigned.
21961 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21963 Bit 2 is 1 for rounding functions, 0 otherwise. */
21965 /* Identify the type as 's', 'u', 'p' or 'f'. */
21966 case 'T':
21968 HOST_WIDE_INT bits = INTVAL (x);
21969 fputc ("uspf"[bits & 3], stream);
21971 return;
21973 /* Likewise, but signed and unsigned integers are both 'i'. */
21974 case 'F':
21976 HOST_WIDE_INT bits = INTVAL (x);
21977 fputc ("iipf"[bits & 3], stream);
21979 return;
21981 /* As for 'T', but emit 'u' instead of 'p'. */
21982 case 't':
21984 HOST_WIDE_INT bits = INTVAL (x);
21985 fputc ("usuf"[bits & 3], stream);
21987 return;
21989 /* Bit 2: rounding (vs none). */
21990 case 'O':
21992 HOST_WIDE_INT bits = INTVAL (x);
21993 fputs ((bits & 4) != 0 ? "r" : "", stream);
21995 return;
21997 /* Memory operand for vld1/vst1 instruction. */
21998 case 'A':
22000 rtx addr;
22001 bool postinc = FALSE;
22002 rtx postinc_reg = NULL;
22003 unsigned align, memsize, align_bits;
22005 gcc_assert (MEM_P (x));
22006 addr = XEXP (x, 0);
22007 if (GET_CODE (addr) == POST_INC)
22009 postinc = 1;
22010 addr = XEXP (addr, 0);
22012 if (GET_CODE (addr) == POST_MODIFY)
22014 postinc_reg = XEXP( XEXP (addr, 1), 1);
22015 addr = XEXP (addr, 0);
22017 asm_fprintf (stream, "[%r", REGNO (addr));
22019 /* We know the alignment of this access, so we can emit a hint in the
22020 instruction (for some alignments) as an aid to the memory subsystem
22021 of the target. */
22022 align = MEM_ALIGN (x) >> 3;
22023 memsize = MEM_SIZE (x);
22025 /* Only certain alignment specifiers are supported by the hardware. */
22026 if (memsize == 32 && (align % 32) == 0)
22027 align_bits = 256;
22028 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22029 align_bits = 128;
22030 else if (memsize >= 8 && (align % 8) == 0)
22031 align_bits = 64;
22032 else
22033 align_bits = 0;
22035 if (align_bits != 0)
22036 asm_fprintf (stream, ":%d", align_bits);
22038 asm_fprintf (stream, "]");
22040 if (postinc)
22041 fputs("!", stream);
22042 if (postinc_reg)
22043 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22045 return;
22047 case 'C':
22049 rtx addr;
22051 gcc_assert (MEM_P (x));
22052 addr = XEXP (x, 0);
22053 gcc_assert (REG_P (addr));
22054 asm_fprintf (stream, "[%r]", REGNO (addr));
22056 return;
22058 /* Translate an S register number into a D register number and element index. */
22059 case 'y':
22061 machine_mode mode = GET_MODE (x);
22062 int regno;
22064 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22066 output_operand_lossage ("invalid operand for code '%c'", code);
22067 return;
22070 regno = REGNO (x);
22071 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22073 output_operand_lossage ("invalid operand for code '%c'", code);
22074 return;
22077 regno = regno - FIRST_VFP_REGNUM;
22078 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22080 return;
22082 case 'v':
22083 gcc_assert (CONST_DOUBLE_P (x));
22084 int result;
22085 result = vfp3_const_double_for_fract_bits (x);
22086 if (result == 0)
22087 result = vfp3_const_double_for_bits (x);
22088 fprintf (stream, "#%d", result);
22089 return;
22091 /* Register specifier for vld1.16/vst1.16. Translate the S register
22092 number into a D register number and element index. */
22093 case 'z':
22095 machine_mode mode = GET_MODE (x);
22096 int regno;
22098 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22100 output_operand_lossage ("invalid operand for code '%c'", code);
22101 return;
22104 regno = REGNO (x);
22105 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22107 output_operand_lossage ("invalid operand for code '%c'", code);
22108 return;
22111 regno = regno - FIRST_VFP_REGNUM;
22112 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22114 return;
22116 default:
22117 if (x == 0)
22119 output_operand_lossage ("missing operand");
22120 return;
22123 switch (GET_CODE (x))
22125 case REG:
22126 asm_fprintf (stream, "%r", REGNO (x));
22127 break;
22129 case MEM:
22130 output_memory_reference_mode = GET_MODE (x);
22131 output_address (XEXP (x, 0));
22132 break;
22134 case CONST_DOUBLE:
22136 char fpstr[20];
22137 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22138 sizeof (fpstr), 0, 1);
22139 fprintf (stream, "#%s", fpstr);
22141 break;
22143 default:
22144 gcc_assert (GET_CODE (x) != NEG);
22145 fputc ('#', stream);
22146 if (GET_CODE (x) == HIGH)
22148 fputs (":lower16:", stream);
22149 x = XEXP (x, 0);
22152 output_addr_const (stream, x);
22153 break;
22158 /* Target hook for printing a memory address. */
22159 static void
22160 arm_print_operand_address (FILE *stream, rtx x)
22162 if (TARGET_32BIT)
22164 int is_minus = GET_CODE (x) == MINUS;
22166 if (REG_P (x))
22167 asm_fprintf (stream, "[%r]", REGNO (x));
22168 else if (GET_CODE (x) == PLUS || is_minus)
22170 rtx base = XEXP (x, 0);
22171 rtx index = XEXP (x, 1);
22172 HOST_WIDE_INT offset = 0;
22173 if (!REG_P (base)
22174 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22176 /* Ensure that BASE is a register. */
22177 /* (one of them must be). */
22178 /* Also ensure the SP is not used as in index register. */
22179 rtx temp = base;
22180 base = index;
22181 index = temp;
22183 switch (GET_CODE (index))
22185 case CONST_INT:
22186 offset = INTVAL (index);
22187 if (is_minus)
22188 offset = -offset;
22189 asm_fprintf (stream, "[%r, #%wd]",
22190 REGNO (base), offset);
22191 break;
22193 case REG:
22194 asm_fprintf (stream, "[%r, %s%r]",
22195 REGNO (base), is_minus ? "-" : "",
22196 REGNO (index));
22197 break;
22199 case MULT:
22200 case ASHIFTRT:
22201 case LSHIFTRT:
22202 case ASHIFT:
22203 case ROTATERT:
22205 asm_fprintf (stream, "[%r, %s%r",
22206 REGNO (base), is_minus ? "-" : "",
22207 REGNO (XEXP (index, 0)));
22208 arm_print_operand (stream, index, 'S');
22209 fputs ("]", stream);
22210 break;
22213 default:
22214 gcc_unreachable ();
22217 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22218 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22220 extern machine_mode output_memory_reference_mode;
22222 gcc_assert (REG_P (XEXP (x, 0)));
22224 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22225 asm_fprintf (stream, "[%r, #%s%d]!",
22226 REGNO (XEXP (x, 0)),
22227 GET_CODE (x) == PRE_DEC ? "-" : "",
22228 GET_MODE_SIZE (output_memory_reference_mode));
22229 else
22230 asm_fprintf (stream, "[%r], #%s%d",
22231 REGNO (XEXP (x, 0)),
22232 GET_CODE (x) == POST_DEC ? "-" : "",
22233 GET_MODE_SIZE (output_memory_reference_mode));
22235 else if (GET_CODE (x) == PRE_MODIFY)
22237 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22238 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22239 asm_fprintf (stream, "#%wd]!",
22240 INTVAL (XEXP (XEXP (x, 1), 1)));
22241 else
22242 asm_fprintf (stream, "%r]!",
22243 REGNO (XEXP (XEXP (x, 1), 1)));
22245 else if (GET_CODE (x) == POST_MODIFY)
22247 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22248 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22249 asm_fprintf (stream, "#%wd",
22250 INTVAL (XEXP (XEXP (x, 1), 1)));
22251 else
22252 asm_fprintf (stream, "%r",
22253 REGNO (XEXP (XEXP (x, 1), 1)));
22255 else output_addr_const (stream, x);
22257 else
22259 if (REG_P (x))
22260 asm_fprintf (stream, "[%r]", REGNO (x));
22261 else if (GET_CODE (x) == POST_INC)
22262 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22263 else if (GET_CODE (x) == PLUS)
22265 gcc_assert (REG_P (XEXP (x, 0)));
22266 if (CONST_INT_P (XEXP (x, 1)))
22267 asm_fprintf (stream, "[%r, #%wd]",
22268 REGNO (XEXP (x, 0)),
22269 INTVAL (XEXP (x, 1)));
22270 else
22271 asm_fprintf (stream, "[%r, %r]",
22272 REGNO (XEXP (x, 0)),
22273 REGNO (XEXP (x, 1)));
22275 else
22276 output_addr_const (stream, x);
22280 /* Target hook for indicating whether a punctuation character for
22281 TARGET_PRINT_OPERAND is valid. */
22282 static bool
22283 arm_print_operand_punct_valid_p (unsigned char code)
22285 return (code == '@' || code == '|' || code == '.'
22286 || code == '(' || code == ')' || code == '#'
22287 || (TARGET_32BIT && (code == '?'))
22288 || (TARGET_THUMB2 && (code == '!'))
22289 || (TARGET_THUMB && (code == '_')));
22292 /* Target hook for assembling integer objects. The ARM version needs to
22293 handle word-sized values specially. */
22294 static bool
22295 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22297 machine_mode mode;
22299 if (size == UNITS_PER_WORD && aligned_p)
22301 fputs ("\t.word\t", asm_out_file);
22302 output_addr_const (asm_out_file, x);
22304 /* Mark symbols as position independent. We only do this in the
22305 .text segment, not in the .data segment. */
22306 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22307 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22309 /* See legitimize_pic_address for an explanation of the
22310 TARGET_VXWORKS_RTP check. */
22311 if (!arm_pic_data_is_text_relative
22312 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22313 fputs ("(GOT)", asm_out_file);
22314 else
22315 fputs ("(GOTOFF)", asm_out_file);
22317 fputc ('\n', asm_out_file);
22318 return true;
22321 mode = GET_MODE (x);
22323 if (arm_vector_mode_supported_p (mode))
22325 int i, units;
22327 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22329 units = CONST_VECTOR_NUNITS (x);
22330 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22332 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22333 for (i = 0; i < units; i++)
22335 rtx elt = CONST_VECTOR_ELT (x, i);
22336 assemble_integer
22337 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22339 else
22340 for (i = 0; i < units; i++)
22342 rtx elt = CONST_VECTOR_ELT (x, i);
22343 REAL_VALUE_TYPE rval;
22345 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22347 assemble_real
22348 (rval, GET_MODE_INNER (mode),
22349 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22352 return true;
22355 return default_assemble_integer (x, size, aligned_p);
22358 static void
22359 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22361 section *s;
22363 if (!TARGET_AAPCS_BASED)
22365 (is_ctor ?
22366 default_named_section_asm_out_constructor
22367 : default_named_section_asm_out_destructor) (symbol, priority);
22368 return;
22371 /* Put these in the .init_array section, using a special relocation. */
22372 if (priority != DEFAULT_INIT_PRIORITY)
22374 char buf[18];
22375 sprintf (buf, "%s.%.5u",
22376 is_ctor ? ".init_array" : ".fini_array",
22377 priority);
22378 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22380 else if (is_ctor)
22381 s = ctors_section;
22382 else
22383 s = dtors_section;
22385 switch_to_section (s);
22386 assemble_align (POINTER_SIZE);
22387 fputs ("\t.word\t", asm_out_file);
22388 output_addr_const (asm_out_file, symbol);
22389 fputs ("(target1)\n", asm_out_file);
22392 /* Add a function to the list of static constructors. */
22394 static void
22395 arm_elf_asm_constructor (rtx symbol, int priority)
22397 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22400 /* Add a function to the list of static destructors. */
22402 static void
22403 arm_elf_asm_destructor (rtx symbol, int priority)
22405 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22408 /* A finite state machine takes care of noticing whether or not instructions
22409 can be conditionally executed, and thus decrease execution time and code
22410 size by deleting branch instructions. The fsm is controlled by
22411 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22413 /* The state of the fsm controlling condition codes are:
22414 0: normal, do nothing special
22415 1: make ASM_OUTPUT_OPCODE not output this instruction
22416 2: make ASM_OUTPUT_OPCODE not output this instruction
22417 3: make instructions conditional
22418 4: make instructions conditional
22420 State transitions (state->state by whom under condition):
22421 0 -> 1 final_prescan_insn if the `target' is a label
22422 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22423 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22424 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22425 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22426 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22427 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22428 (the target insn is arm_target_insn).
22430 If the jump clobbers the conditions then we use states 2 and 4.
22432 A similar thing can be done with conditional return insns.
22434 XXX In case the `target' is an unconditional branch, this conditionalising
22435 of the instructions always reduces code size, but not always execution
22436 time. But then, I want to reduce the code size to somewhere near what
22437 /bin/cc produces. */
22439 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22440 instructions. When a COND_EXEC instruction is seen the subsequent
22441 instructions are scanned so that multiple conditional instructions can be
22442 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22443 specify the length and true/false mask for the IT block. These will be
22444 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22446 /* Returns the index of the ARM condition code string in
22447 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22448 COMPARISON should be an rtx like `(eq (...) (...))'. */
22450 enum arm_cond_code
22451 maybe_get_arm_condition_code (rtx comparison)
22453 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22454 enum arm_cond_code code;
22455 enum rtx_code comp_code = GET_CODE (comparison);
22457 if (GET_MODE_CLASS (mode) != MODE_CC)
22458 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22459 XEXP (comparison, 1));
22461 switch (mode)
22463 case CC_DNEmode: code = ARM_NE; goto dominance;
22464 case CC_DEQmode: code = ARM_EQ; goto dominance;
22465 case CC_DGEmode: code = ARM_GE; goto dominance;
22466 case CC_DGTmode: code = ARM_GT; goto dominance;
22467 case CC_DLEmode: code = ARM_LE; goto dominance;
22468 case CC_DLTmode: code = ARM_LT; goto dominance;
22469 case CC_DGEUmode: code = ARM_CS; goto dominance;
22470 case CC_DGTUmode: code = ARM_HI; goto dominance;
22471 case CC_DLEUmode: code = ARM_LS; goto dominance;
22472 case CC_DLTUmode: code = ARM_CC;
22474 dominance:
22475 if (comp_code == EQ)
22476 return ARM_INVERSE_CONDITION_CODE (code);
22477 if (comp_code == NE)
22478 return code;
22479 return ARM_NV;
22481 case CC_NOOVmode:
22482 switch (comp_code)
22484 case NE: return ARM_NE;
22485 case EQ: return ARM_EQ;
22486 case GE: return ARM_PL;
22487 case LT: return ARM_MI;
22488 default: return ARM_NV;
22491 case CC_Zmode:
22492 switch (comp_code)
22494 case NE: return ARM_NE;
22495 case EQ: return ARM_EQ;
22496 default: return ARM_NV;
22499 case CC_Nmode:
22500 switch (comp_code)
22502 case NE: return ARM_MI;
22503 case EQ: return ARM_PL;
22504 default: return ARM_NV;
22507 case CCFPEmode:
22508 case CCFPmode:
22509 /* We can handle all cases except UNEQ and LTGT. */
22510 switch (comp_code)
22512 case GE: return ARM_GE;
22513 case GT: return ARM_GT;
22514 case LE: return ARM_LS;
22515 case LT: return ARM_MI;
22516 case NE: return ARM_NE;
22517 case EQ: return ARM_EQ;
22518 case ORDERED: return ARM_VC;
22519 case UNORDERED: return ARM_VS;
22520 case UNLT: return ARM_LT;
22521 case UNLE: return ARM_LE;
22522 case UNGT: return ARM_HI;
22523 case UNGE: return ARM_PL;
22524 /* UNEQ and LTGT do not have a representation. */
22525 case UNEQ: /* Fall through. */
22526 case LTGT: /* Fall through. */
22527 default: return ARM_NV;
22530 case CC_SWPmode:
22531 switch (comp_code)
22533 case NE: return ARM_NE;
22534 case EQ: return ARM_EQ;
22535 case GE: return ARM_LE;
22536 case GT: return ARM_LT;
22537 case LE: return ARM_GE;
22538 case LT: return ARM_GT;
22539 case GEU: return ARM_LS;
22540 case GTU: return ARM_CC;
22541 case LEU: return ARM_CS;
22542 case LTU: return ARM_HI;
22543 default: return ARM_NV;
22546 case CC_Cmode:
22547 switch (comp_code)
22549 case LTU: return ARM_CS;
22550 case GEU: return ARM_CC;
22551 default: return ARM_NV;
22554 case CC_CZmode:
22555 switch (comp_code)
22557 case NE: return ARM_NE;
22558 case EQ: return ARM_EQ;
22559 case GEU: return ARM_CS;
22560 case GTU: return ARM_HI;
22561 case LEU: return ARM_LS;
22562 case LTU: return ARM_CC;
22563 default: return ARM_NV;
22566 case CC_NCVmode:
22567 switch (comp_code)
22569 case GE: return ARM_GE;
22570 case LT: return ARM_LT;
22571 case GEU: return ARM_CS;
22572 case LTU: return ARM_CC;
22573 default: return ARM_NV;
22576 case CCmode:
22577 switch (comp_code)
22579 case NE: return ARM_NE;
22580 case EQ: return ARM_EQ;
22581 case GE: return ARM_GE;
22582 case GT: return ARM_GT;
22583 case LE: return ARM_LE;
22584 case LT: return ARM_LT;
22585 case GEU: return ARM_CS;
22586 case GTU: return ARM_HI;
22587 case LEU: return ARM_LS;
22588 case LTU: return ARM_CC;
22589 default: return ARM_NV;
22592 default: gcc_unreachable ();
22596 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22597 static enum arm_cond_code
22598 get_arm_condition_code (rtx comparison)
22600 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22601 gcc_assert (code != ARM_NV);
22602 return code;
22605 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22606 instructions. */
22607 void
22608 thumb2_final_prescan_insn (rtx_insn *insn)
22610 rtx_insn *first_insn = insn;
22611 rtx body = PATTERN (insn);
22612 rtx predicate;
22613 enum arm_cond_code code;
22614 int n;
22615 int mask;
22616 int max;
22618 /* max_insns_skipped in the tune was already taken into account in the
22619 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22620 just emit the IT blocks as we can. It does not make sense to split
22621 the IT blocks. */
22622 max = MAX_INSN_PER_IT_BLOCK;
22624 /* Remove the previous insn from the count of insns to be output. */
22625 if (arm_condexec_count)
22626 arm_condexec_count--;
22628 /* Nothing to do if we are already inside a conditional block. */
22629 if (arm_condexec_count)
22630 return;
22632 if (GET_CODE (body) != COND_EXEC)
22633 return;
22635 /* Conditional jumps are implemented directly. */
22636 if (JUMP_P (insn))
22637 return;
22639 predicate = COND_EXEC_TEST (body);
22640 arm_current_cc = get_arm_condition_code (predicate);
22642 n = get_attr_ce_count (insn);
22643 arm_condexec_count = 1;
22644 arm_condexec_mask = (1 << n) - 1;
22645 arm_condexec_masklen = n;
22646 /* See if subsequent instructions can be combined into the same block. */
22647 for (;;)
22649 insn = next_nonnote_insn (insn);
22651 /* Jumping into the middle of an IT block is illegal, so a label or
22652 barrier terminates the block. */
22653 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22654 break;
22656 body = PATTERN (insn);
22657 /* USE and CLOBBER aren't really insns, so just skip them. */
22658 if (GET_CODE (body) == USE
22659 || GET_CODE (body) == CLOBBER)
22660 continue;
22662 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22663 if (GET_CODE (body) != COND_EXEC)
22664 break;
22665 /* Maximum number of conditionally executed instructions in a block. */
22666 n = get_attr_ce_count (insn);
22667 if (arm_condexec_masklen + n > max)
22668 break;
22670 predicate = COND_EXEC_TEST (body);
22671 code = get_arm_condition_code (predicate);
22672 mask = (1 << n) - 1;
22673 if (arm_current_cc == code)
22674 arm_condexec_mask |= (mask << arm_condexec_masklen);
22675 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22676 break;
22678 arm_condexec_count++;
22679 arm_condexec_masklen += n;
22681 /* A jump must be the last instruction in a conditional block. */
22682 if (JUMP_P (insn))
22683 break;
22685 /* Restore recog_data (getting the attributes of other insns can
22686 destroy this array, but final.c assumes that it remains intact
22687 across this call). */
22688 extract_constrain_insn_cached (first_insn);
22691 void
22692 arm_final_prescan_insn (rtx_insn *insn)
22694 /* BODY will hold the body of INSN. */
22695 rtx body = PATTERN (insn);
22697 /* This will be 1 if trying to repeat the trick, and things need to be
22698 reversed if it appears to fail. */
22699 int reverse = 0;
22701 /* If we start with a return insn, we only succeed if we find another one. */
22702 int seeking_return = 0;
22703 enum rtx_code return_code = UNKNOWN;
22705 /* START_INSN will hold the insn from where we start looking. This is the
22706 first insn after the following code_label if REVERSE is true. */
22707 rtx_insn *start_insn = insn;
22709 /* If in state 4, check if the target branch is reached, in order to
22710 change back to state 0. */
22711 if (arm_ccfsm_state == 4)
22713 if (insn == arm_target_insn)
22715 arm_target_insn = NULL;
22716 arm_ccfsm_state = 0;
22718 return;
22721 /* If in state 3, it is possible to repeat the trick, if this insn is an
22722 unconditional branch to a label, and immediately following this branch
22723 is the previous target label which is only used once, and the label this
22724 branch jumps to is not too far off. */
22725 if (arm_ccfsm_state == 3)
22727 if (simplejump_p (insn))
22729 start_insn = next_nonnote_insn (start_insn);
22730 if (BARRIER_P (start_insn))
22732 /* XXX Isn't this always a barrier? */
22733 start_insn = next_nonnote_insn (start_insn);
22735 if (LABEL_P (start_insn)
22736 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22737 && LABEL_NUSES (start_insn) == 1)
22738 reverse = TRUE;
22739 else
22740 return;
22742 else if (ANY_RETURN_P (body))
22744 start_insn = next_nonnote_insn (start_insn);
22745 if (BARRIER_P (start_insn))
22746 start_insn = next_nonnote_insn (start_insn);
22747 if (LABEL_P (start_insn)
22748 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22749 && LABEL_NUSES (start_insn) == 1)
22751 reverse = TRUE;
22752 seeking_return = 1;
22753 return_code = GET_CODE (body);
22755 else
22756 return;
22758 else
22759 return;
22762 gcc_assert (!arm_ccfsm_state || reverse);
22763 if (!JUMP_P (insn))
22764 return;
22766 /* This jump might be paralleled with a clobber of the condition codes
22767 the jump should always come first */
22768 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22769 body = XVECEXP (body, 0, 0);
22771 if (reverse
22772 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22773 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22775 int insns_skipped;
22776 int fail = FALSE, succeed = FALSE;
22777 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22778 int then_not_else = TRUE;
22779 rtx_insn *this_insn = start_insn;
22780 rtx label = 0;
22782 /* Register the insn jumped to. */
22783 if (reverse)
22785 if (!seeking_return)
22786 label = XEXP (SET_SRC (body), 0);
22788 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22789 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22790 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22792 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22793 then_not_else = FALSE;
22795 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22797 seeking_return = 1;
22798 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22800 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22802 seeking_return = 1;
22803 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22804 then_not_else = FALSE;
22806 else
22807 gcc_unreachable ();
22809 /* See how many insns this branch skips, and what kind of insns. If all
22810 insns are okay, and the label or unconditional branch to the same
22811 label is not too far away, succeed. */
22812 for (insns_skipped = 0;
22813 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22815 rtx scanbody;
22817 this_insn = next_nonnote_insn (this_insn);
22818 if (!this_insn)
22819 break;
22821 switch (GET_CODE (this_insn))
22823 case CODE_LABEL:
22824 /* Succeed if it is the target label, otherwise fail since
22825 control falls in from somewhere else. */
22826 if (this_insn == label)
22828 arm_ccfsm_state = 1;
22829 succeed = TRUE;
22831 else
22832 fail = TRUE;
22833 break;
22835 case BARRIER:
22836 /* Succeed if the following insn is the target label.
22837 Otherwise fail.
22838 If return insns are used then the last insn in a function
22839 will be a barrier. */
22840 this_insn = next_nonnote_insn (this_insn);
22841 if (this_insn && this_insn == label)
22843 arm_ccfsm_state = 1;
22844 succeed = TRUE;
22846 else
22847 fail = TRUE;
22848 break;
22850 case CALL_INSN:
22851 /* The AAPCS says that conditional calls should not be
22852 used since they make interworking inefficient (the
22853 linker can't transform BL<cond> into BLX). That's
22854 only a problem if the machine has BLX. */
22855 if (arm_arch5)
22857 fail = TRUE;
22858 break;
22861 /* Succeed if the following insn is the target label, or
22862 if the following two insns are a barrier and the
22863 target label. */
22864 this_insn = next_nonnote_insn (this_insn);
22865 if (this_insn && BARRIER_P (this_insn))
22866 this_insn = next_nonnote_insn (this_insn);
22868 if (this_insn && this_insn == label
22869 && insns_skipped < max_insns_skipped)
22871 arm_ccfsm_state = 1;
22872 succeed = TRUE;
22874 else
22875 fail = TRUE;
22876 break;
22878 case JUMP_INSN:
22879 /* If this is an unconditional branch to the same label, succeed.
22880 If it is to another label, do nothing. If it is conditional,
22881 fail. */
22882 /* XXX Probably, the tests for SET and the PC are
22883 unnecessary. */
22885 scanbody = PATTERN (this_insn);
22886 if (GET_CODE (scanbody) == SET
22887 && GET_CODE (SET_DEST (scanbody)) == PC)
22889 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22890 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22892 arm_ccfsm_state = 2;
22893 succeed = TRUE;
22895 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22896 fail = TRUE;
22898 /* Fail if a conditional return is undesirable (e.g. on a
22899 StrongARM), but still allow this if optimizing for size. */
22900 else if (GET_CODE (scanbody) == return_code
22901 && !use_return_insn (TRUE, NULL)
22902 && !optimize_size)
22903 fail = TRUE;
22904 else if (GET_CODE (scanbody) == return_code)
22906 arm_ccfsm_state = 2;
22907 succeed = TRUE;
22909 else if (GET_CODE (scanbody) == PARALLEL)
22911 switch (get_attr_conds (this_insn))
22913 case CONDS_NOCOND:
22914 break;
22915 default:
22916 fail = TRUE;
22917 break;
22920 else
22921 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
22923 break;
22925 case INSN:
22926 /* Instructions using or affecting the condition codes make it
22927 fail. */
22928 scanbody = PATTERN (this_insn);
22929 if (!(GET_CODE (scanbody) == SET
22930 || GET_CODE (scanbody) == PARALLEL)
22931 || get_attr_conds (this_insn) != CONDS_NOCOND)
22932 fail = TRUE;
22933 break;
22935 default:
22936 break;
22939 if (succeed)
22941 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22942 arm_target_label = CODE_LABEL_NUMBER (label);
22943 else
22945 gcc_assert (seeking_return || arm_ccfsm_state == 2);
22947 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22949 this_insn = next_nonnote_insn (this_insn);
22950 gcc_assert (!this_insn
22951 || (!BARRIER_P (this_insn)
22952 && !LABEL_P (this_insn)));
22954 if (!this_insn)
22956 /* Oh, dear! we ran off the end.. give up. */
22957 extract_constrain_insn_cached (insn);
22958 arm_ccfsm_state = 0;
22959 arm_target_insn = NULL;
22960 return;
22962 arm_target_insn = this_insn;
22965 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22966 what it was. */
22967 if (!reverse)
22968 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22970 if (reverse || then_not_else)
22971 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22974 /* Restore recog_data (getting the attributes of other insns can
22975 destroy this array, but final.c assumes that it remains intact
22976 across this call. */
22977 extract_constrain_insn_cached (insn);
22981 /* Output IT instructions. */
22982 void
22983 thumb2_asm_output_opcode (FILE * stream)
22985 char buff[5];
22986 int n;
22988 if (arm_condexec_mask)
22990 for (n = 0; n < arm_condexec_masklen; n++)
22991 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
22992 buff[n] = 0;
22993 asm_fprintf(stream, "i%s\t%s\n\t", buff,
22994 arm_condition_codes[arm_current_cc]);
22995 arm_condexec_mask = 0;
22999 /* Returns true if REGNO is a valid register
23000 for holding a quantity of type MODE. */
23002 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23004 if (GET_MODE_CLASS (mode) == MODE_CC)
23005 return (regno == CC_REGNUM
23006 || (TARGET_HARD_FLOAT && TARGET_VFP
23007 && regno == VFPCC_REGNUM));
23009 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23010 return false;
23012 if (TARGET_THUMB1)
23013 /* For the Thumb we only allow values bigger than SImode in
23014 registers 0 - 6, so that there is always a second low
23015 register available to hold the upper part of the value.
23016 We probably we ought to ensure that the register is the
23017 start of an even numbered register pair. */
23018 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23020 if (TARGET_HARD_FLOAT && TARGET_VFP
23021 && IS_VFP_REGNUM (regno))
23023 if (mode == SFmode || mode == SImode)
23024 return VFP_REGNO_OK_FOR_SINGLE (regno);
23026 if (mode == DFmode)
23027 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23029 /* VFP registers can hold HFmode values, but there is no point in
23030 putting them there unless we have hardware conversion insns. */
23031 if (mode == HFmode)
23032 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
23034 if (TARGET_NEON)
23035 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23036 || (VALID_NEON_QREG_MODE (mode)
23037 && NEON_REGNO_OK_FOR_QUAD (regno))
23038 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23039 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23040 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23041 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23042 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23044 return FALSE;
23047 if (TARGET_REALLY_IWMMXT)
23049 if (IS_IWMMXT_GR_REGNUM (regno))
23050 return mode == SImode;
23052 if (IS_IWMMXT_REGNUM (regno))
23053 return VALID_IWMMXT_REG_MODE (mode);
23056 /* We allow almost any value to be stored in the general registers.
23057 Restrict doubleword quantities to even register pairs in ARM state
23058 so that we can use ldrd. Do not allow very large Neon structure
23059 opaque modes in general registers; they would use too many. */
23060 if (regno <= LAST_ARM_REGNUM)
23062 if (ARM_NUM_REGS (mode) > 4)
23063 return FALSE;
23065 if (TARGET_THUMB2)
23066 return TRUE;
23068 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23071 if (regno == FRAME_POINTER_REGNUM
23072 || regno == ARG_POINTER_REGNUM)
23073 /* We only allow integers in the fake hard registers. */
23074 return GET_MODE_CLASS (mode) == MODE_INT;
23076 return FALSE;
23079 /* Implement MODES_TIEABLE_P. */
23081 bool
23082 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23084 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23085 return true;
23087 /* We specifically want to allow elements of "structure" modes to
23088 be tieable to the structure. This more general condition allows
23089 other rarer situations too. */
23090 if (TARGET_NEON
23091 && (VALID_NEON_DREG_MODE (mode1)
23092 || VALID_NEON_QREG_MODE (mode1)
23093 || VALID_NEON_STRUCT_MODE (mode1))
23094 && (VALID_NEON_DREG_MODE (mode2)
23095 || VALID_NEON_QREG_MODE (mode2)
23096 || VALID_NEON_STRUCT_MODE (mode2)))
23097 return true;
23099 return false;
23102 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23103 not used in arm mode. */
23105 enum reg_class
23106 arm_regno_class (int regno)
23108 if (regno == PC_REGNUM)
23109 return NO_REGS;
23111 if (TARGET_THUMB1)
23113 if (regno == STACK_POINTER_REGNUM)
23114 return STACK_REG;
23115 if (regno == CC_REGNUM)
23116 return CC_REG;
23117 if (regno < 8)
23118 return LO_REGS;
23119 return HI_REGS;
23122 if (TARGET_THUMB2 && regno < 8)
23123 return LO_REGS;
23125 if ( regno <= LAST_ARM_REGNUM
23126 || regno == FRAME_POINTER_REGNUM
23127 || regno == ARG_POINTER_REGNUM)
23128 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23130 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23131 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23133 if (IS_VFP_REGNUM (regno))
23135 if (regno <= D7_VFP_REGNUM)
23136 return VFP_D0_D7_REGS;
23137 else if (regno <= LAST_LO_VFP_REGNUM)
23138 return VFP_LO_REGS;
23139 else
23140 return VFP_HI_REGS;
23143 if (IS_IWMMXT_REGNUM (regno))
23144 return IWMMXT_REGS;
23146 if (IS_IWMMXT_GR_REGNUM (regno))
23147 return IWMMXT_GR_REGS;
23149 return NO_REGS;
23152 /* Handle a special case when computing the offset
23153 of an argument from the frame pointer. */
23155 arm_debugger_arg_offset (int value, rtx addr)
23157 rtx_insn *insn;
23159 /* We are only interested if dbxout_parms() failed to compute the offset. */
23160 if (value != 0)
23161 return 0;
23163 /* We can only cope with the case where the address is held in a register. */
23164 if (!REG_P (addr))
23165 return 0;
23167 /* If we are using the frame pointer to point at the argument, then
23168 an offset of 0 is correct. */
23169 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23170 return 0;
23172 /* If we are using the stack pointer to point at the
23173 argument, then an offset of 0 is correct. */
23174 /* ??? Check this is consistent with thumb2 frame layout. */
23175 if ((TARGET_THUMB || !frame_pointer_needed)
23176 && REGNO (addr) == SP_REGNUM)
23177 return 0;
23179 /* Oh dear. The argument is pointed to by a register rather
23180 than being held in a register, or being stored at a known
23181 offset from the frame pointer. Since GDB only understands
23182 those two kinds of argument we must translate the address
23183 held in the register into an offset from the frame pointer.
23184 We do this by searching through the insns for the function
23185 looking to see where this register gets its value. If the
23186 register is initialized from the frame pointer plus an offset
23187 then we are in luck and we can continue, otherwise we give up.
23189 This code is exercised by producing debugging information
23190 for a function with arguments like this:
23192 double func (double a, double b, int c, double d) {return d;}
23194 Without this code the stab for parameter 'd' will be set to
23195 an offset of 0 from the frame pointer, rather than 8. */
23197 /* The if() statement says:
23199 If the insn is a normal instruction
23200 and if the insn is setting the value in a register
23201 and if the register being set is the register holding the address of the argument
23202 and if the address is computing by an addition
23203 that involves adding to a register
23204 which is the frame pointer
23205 a constant integer
23207 then... */
23209 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23211 if ( NONJUMP_INSN_P (insn)
23212 && GET_CODE (PATTERN (insn)) == SET
23213 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23214 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23215 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23216 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23217 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23220 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23222 break;
23226 if (value == 0)
23228 debug_rtx (addr);
23229 warning (0, "unable to compute real location of stacked parameter");
23230 value = 8; /* XXX magic hack */
23233 return value;
23236 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23238 static const char *
23239 arm_invalid_parameter_type (const_tree t)
23241 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23242 return N_("function parameters cannot have __fp16 type");
23243 return NULL;
23246 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23248 static const char *
23249 arm_invalid_return_type (const_tree t)
23251 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23252 return N_("functions cannot return __fp16 type");
23253 return NULL;
23256 /* Implement TARGET_PROMOTED_TYPE. */
23258 static tree
23259 arm_promoted_type (const_tree t)
23261 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23262 return float_type_node;
23263 return NULL_TREE;
23266 /* Implement TARGET_CONVERT_TO_TYPE.
23267 Specifically, this hook implements the peculiarity of the ARM
23268 half-precision floating-point C semantics that requires conversions between
23269 __fp16 to or from double to do an intermediate conversion to float. */
23271 static tree
23272 arm_convert_to_type (tree type, tree expr)
23274 tree fromtype = TREE_TYPE (expr);
23275 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
23276 return NULL_TREE;
23277 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
23278 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
23279 return convert (type, convert (float_type_node, expr));
23280 return NULL_TREE;
23283 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23284 This simply adds HFmode as a supported mode; even though we don't
23285 implement arithmetic on this type directly, it's supported by
23286 optabs conversions, much the way the double-word arithmetic is
23287 special-cased in the default hook. */
23289 static bool
23290 arm_scalar_mode_supported_p (machine_mode mode)
23292 if (mode == HFmode)
23293 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23294 else if (ALL_FIXED_POINT_MODE_P (mode))
23295 return true;
23296 else
23297 return default_scalar_mode_supported_p (mode);
23300 /* Emit code to reinterpret one Neon type as another, without altering bits. */
23301 void
23302 neon_reinterpret (rtx dest, rtx src)
23304 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
23307 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23308 not to early-clobber SRC registers in the process.
23310 We assume that the operands described by SRC and DEST represent a
23311 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23312 number of components into which the copy has been decomposed. */
23313 void
23314 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23316 unsigned int i;
23318 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23319 || REGNO (operands[0]) < REGNO (operands[1]))
23321 for (i = 0; i < count; i++)
23323 operands[2 * i] = dest[i];
23324 operands[2 * i + 1] = src[i];
23327 else
23329 for (i = 0; i < count; i++)
23331 operands[2 * i] = dest[count - i - 1];
23332 operands[2 * i + 1] = src[count - i - 1];
23337 /* Split operands into moves from op[1] + op[2] into op[0]. */
23339 void
23340 neon_split_vcombine (rtx operands[3])
23342 unsigned int dest = REGNO (operands[0]);
23343 unsigned int src1 = REGNO (operands[1]);
23344 unsigned int src2 = REGNO (operands[2]);
23345 machine_mode halfmode = GET_MODE (operands[1]);
23346 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23347 rtx destlo, desthi;
23349 if (src1 == dest && src2 == dest + halfregs)
23351 /* No-op move. Can't split to nothing; emit something. */
23352 emit_note (NOTE_INSN_DELETED);
23353 return;
23356 /* Preserve register attributes for variable tracking. */
23357 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23358 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23359 GET_MODE_SIZE (halfmode));
23361 /* Special case of reversed high/low parts. Use VSWP. */
23362 if (src2 == dest && src1 == dest + halfregs)
23364 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
23365 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
23366 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23367 return;
23370 if (!reg_overlap_mentioned_p (operands[2], destlo))
23372 /* Try to avoid unnecessary moves if part of the result
23373 is in the right place already. */
23374 if (src1 != dest)
23375 emit_move_insn (destlo, operands[1]);
23376 if (src2 != dest + halfregs)
23377 emit_move_insn (desthi, operands[2]);
23379 else
23381 if (src2 != dest + halfregs)
23382 emit_move_insn (desthi, operands[2]);
23383 if (src1 != dest)
23384 emit_move_insn (destlo, operands[1]);
23388 /* Return the number (counting from 0) of
23389 the least significant set bit in MASK. */
23391 inline static int
23392 number_of_first_bit_set (unsigned mask)
23394 return ctz_hwi (mask);
23397 /* Like emit_multi_reg_push, but allowing for a different set of
23398 registers to be described as saved. MASK is the set of registers
23399 to be saved; REAL_REGS is the set of registers to be described as
23400 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23402 static rtx_insn *
23403 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23405 unsigned long regno;
23406 rtx par[10], tmp, reg;
23407 rtx_insn *insn;
23408 int i, j;
23410 /* Build the parallel of the registers actually being stored. */
23411 for (i = 0; mask; ++i, mask &= mask - 1)
23413 regno = ctz_hwi (mask);
23414 reg = gen_rtx_REG (SImode, regno);
23416 if (i == 0)
23417 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23418 else
23419 tmp = gen_rtx_USE (VOIDmode, reg);
23421 par[i] = tmp;
23424 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23425 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23426 tmp = gen_frame_mem (BLKmode, tmp);
23427 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
23428 par[0] = tmp;
23430 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23431 insn = emit_insn (tmp);
23433 /* Always build the stack adjustment note for unwind info. */
23434 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23435 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
23436 par[0] = tmp;
23438 /* Build the parallel of the registers recorded as saved for unwind. */
23439 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23441 regno = ctz_hwi (real_regs);
23442 reg = gen_rtx_REG (SImode, regno);
23444 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23445 tmp = gen_frame_mem (SImode, tmp);
23446 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
23447 RTX_FRAME_RELATED_P (tmp) = 1;
23448 par[j + 1] = tmp;
23451 if (j == 0)
23452 tmp = par[0];
23453 else
23455 RTX_FRAME_RELATED_P (par[0]) = 1;
23456 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23459 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23461 return insn;
23464 /* Emit code to push or pop registers to or from the stack. F is the
23465 assembly file. MASK is the registers to pop. */
23466 static void
23467 thumb_pop (FILE *f, unsigned long mask)
23469 int regno;
23470 int lo_mask = mask & 0xFF;
23471 int pushed_words = 0;
23473 gcc_assert (mask);
23475 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23477 /* Special case. Do not generate a POP PC statement here, do it in
23478 thumb_exit() */
23479 thumb_exit (f, -1);
23480 return;
23483 fprintf (f, "\tpop\t{");
23485 /* Look at the low registers first. */
23486 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23488 if (lo_mask & 1)
23490 asm_fprintf (f, "%r", regno);
23492 if ((lo_mask & ~1) != 0)
23493 fprintf (f, ", ");
23495 pushed_words++;
23499 if (mask & (1 << PC_REGNUM))
23501 /* Catch popping the PC. */
23502 if (TARGET_INTERWORK || TARGET_BACKTRACE
23503 || crtl->calls_eh_return)
23505 /* The PC is never poped directly, instead
23506 it is popped into r3 and then BX is used. */
23507 fprintf (f, "}\n");
23509 thumb_exit (f, -1);
23511 return;
23513 else
23515 if (mask & 0xFF)
23516 fprintf (f, ", ");
23518 asm_fprintf (f, "%r", PC_REGNUM);
23522 fprintf (f, "}\n");
23525 /* Generate code to return from a thumb function.
23526 If 'reg_containing_return_addr' is -1, then the return address is
23527 actually on the stack, at the stack pointer. */
23528 static void
23529 thumb_exit (FILE *f, int reg_containing_return_addr)
23531 unsigned regs_available_for_popping;
23532 unsigned regs_to_pop;
23533 int pops_needed;
23534 unsigned available;
23535 unsigned required;
23536 machine_mode mode;
23537 int size;
23538 int restore_a4 = FALSE;
23540 /* Compute the registers we need to pop. */
23541 regs_to_pop = 0;
23542 pops_needed = 0;
23544 if (reg_containing_return_addr == -1)
23546 regs_to_pop |= 1 << LR_REGNUM;
23547 ++pops_needed;
23550 if (TARGET_BACKTRACE)
23552 /* Restore the (ARM) frame pointer and stack pointer. */
23553 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23554 pops_needed += 2;
23557 /* If there is nothing to pop then just emit the BX instruction and
23558 return. */
23559 if (pops_needed == 0)
23561 if (crtl->calls_eh_return)
23562 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23564 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23565 return;
23567 /* Otherwise if we are not supporting interworking and we have not created
23568 a backtrace structure and the function was not entered in ARM mode then
23569 just pop the return address straight into the PC. */
23570 else if (!TARGET_INTERWORK
23571 && !TARGET_BACKTRACE
23572 && !is_called_in_ARM_mode (current_function_decl)
23573 && !crtl->calls_eh_return)
23575 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23576 return;
23579 /* Find out how many of the (return) argument registers we can corrupt. */
23580 regs_available_for_popping = 0;
23582 /* If returning via __builtin_eh_return, the bottom three registers
23583 all contain information needed for the return. */
23584 if (crtl->calls_eh_return)
23585 size = 12;
23586 else
23588 /* If we can deduce the registers used from the function's
23589 return value. This is more reliable that examining
23590 df_regs_ever_live_p () because that will be set if the register is
23591 ever used in the function, not just if the register is used
23592 to hold a return value. */
23594 if (crtl->return_rtx != 0)
23595 mode = GET_MODE (crtl->return_rtx);
23596 else
23597 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23599 size = GET_MODE_SIZE (mode);
23601 if (size == 0)
23603 /* In a void function we can use any argument register.
23604 In a function that returns a structure on the stack
23605 we can use the second and third argument registers. */
23606 if (mode == VOIDmode)
23607 regs_available_for_popping =
23608 (1 << ARG_REGISTER (1))
23609 | (1 << ARG_REGISTER (2))
23610 | (1 << ARG_REGISTER (3));
23611 else
23612 regs_available_for_popping =
23613 (1 << ARG_REGISTER (2))
23614 | (1 << ARG_REGISTER (3));
23616 else if (size <= 4)
23617 regs_available_for_popping =
23618 (1 << ARG_REGISTER (2))
23619 | (1 << ARG_REGISTER (3));
23620 else if (size <= 8)
23621 regs_available_for_popping =
23622 (1 << ARG_REGISTER (3));
23625 /* Match registers to be popped with registers into which we pop them. */
23626 for (available = regs_available_for_popping,
23627 required = regs_to_pop;
23628 required != 0 && available != 0;
23629 available &= ~(available & - available),
23630 required &= ~(required & - required))
23631 -- pops_needed;
23633 /* If we have any popping registers left over, remove them. */
23634 if (available > 0)
23635 regs_available_for_popping &= ~available;
23637 /* Otherwise if we need another popping register we can use
23638 the fourth argument register. */
23639 else if (pops_needed)
23641 /* If we have not found any free argument registers and
23642 reg a4 contains the return address, we must move it. */
23643 if (regs_available_for_popping == 0
23644 && reg_containing_return_addr == LAST_ARG_REGNUM)
23646 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23647 reg_containing_return_addr = LR_REGNUM;
23649 else if (size > 12)
23651 /* Register a4 is being used to hold part of the return value,
23652 but we have dire need of a free, low register. */
23653 restore_a4 = TRUE;
23655 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23658 if (reg_containing_return_addr != LAST_ARG_REGNUM)
23660 /* The fourth argument register is available. */
23661 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23663 --pops_needed;
23667 /* Pop as many registers as we can. */
23668 thumb_pop (f, regs_available_for_popping);
23670 /* Process the registers we popped. */
23671 if (reg_containing_return_addr == -1)
23673 /* The return address was popped into the lowest numbered register. */
23674 regs_to_pop &= ~(1 << LR_REGNUM);
23676 reg_containing_return_addr =
23677 number_of_first_bit_set (regs_available_for_popping);
23679 /* Remove this register for the mask of available registers, so that
23680 the return address will not be corrupted by further pops. */
23681 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
23684 /* If we popped other registers then handle them here. */
23685 if (regs_available_for_popping)
23687 int frame_pointer;
23689 /* Work out which register currently contains the frame pointer. */
23690 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
23692 /* Move it into the correct place. */
23693 asm_fprintf (f, "\tmov\t%r, %r\n",
23694 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
23696 /* (Temporarily) remove it from the mask of popped registers. */
23697 regs_available_for_popping &= ~(1 << frame_pointer);
23698 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
23700 if (regs_available_for_popping)
23702 int stack_pointer;
23704 /* We popped the stack pointer as well,
23705 find the register that contains it. */
23706 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
23708 /* Move it into the stack register. */
23709 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
23711 /* At this point we have popped all necessary registers, so
23712 do not worry about restoring regs_available_for_popping
23713 to its correct value:
23715 assert (pops_needed == 0)
23716 assert (regs_available_for_popping == (1 << frame_pointer))
23717 assert (regs_to_pop == (1 << STACK_POINTER)) */
23719 else
23721 /* Since we have just move the popped value into the frame
23722 pointer, the popping register is available for reuse, and
23723 we know that we still have the stack pointer left to pop. */
23724 regs_available_for_popping |= (1 << frame_pointer);
23728 /* If we still have registers left on the stack, but we no longer have
23729 any registers into which we can pop them, then we must move the return
23730 address into the link register and make available the register that
23731 contained it. */
23732 if (regs_available_for_popping == 0 && pops_needed > 0)
23734 regs_available_for_popping |= 1 << reg_containing_return_addr;
23736 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
23737 reg_containing_return_addr);
23739 reg_containing_return_addr = LR_REGNUM;
23742 /* If we have registers left on the stack then pop some more.
23743 We know that at most we will want to pop FP and SP. */
23744 if (pops_needed > 0)
23746 int popped_into;
23747 int move_to;
23749 thumb_pop (f, regs_available_for_popping);
23751 /* We have popped either FP or SP.
23752 Move whichever one it is into the correct register. */
23753 popped_into = number_of_first_bit_set (regs_available_for_popping);
23754 move_to = number_of_first_bit_set (regs_to_pop);
23756 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
23758 regs_to_pop &= ~(1 << move_to);
23760 --pops_needed;
23763 /* If we still have not popped everything then we must have only
23764 had one register available to us and we are now popping the SP. */
23765 if (pops_needed > 0)
23767 int popped_into;
23769 thumb_pop (f, regs_available_for_popping);
23771 popped_into = number_of_first_bit_set (regs_available_for_popping);
23773 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
23775 assert (regs_to_pop == (1 << STACK_POINTER))
23776 assert (pops_needed == 1)
23780 /* If necessary restore the a4 register. */
23781 if (restore_a4)
23783 if (reg_containing_return_addr != LR_REGNUM)
23785 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23786 reg_containing_return_addr = LR_REGNUM;
23789 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
23792 if (crtl->calls_eh_return)
23793 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23795 /* Return to caller. */
23796 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23799 /* Scan INSN just before assembler is output for it.
23800 For Thumb-1, we track the status of the condition codes; this
23801 information is used in the cbranchsi4_insn pattern. */
23802 void
23803 thumb1_final_prescan_insn (rtx_insn *insn)
23805 if (flag_print_asm_name)
23806 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
23807 INSN_ADDRESSES (INSN_UID (insn)));
23808 /* Don't overwrite the previous setter when we get to a cbranch. */
23809 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
23811 enum attr_conds conds;
23813 if (cfun->machine->thumb1_cc_insn)
23815 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
23816 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
23817 CC_STATUS_INIT;
23819 conds = get_attr_conds (insn);
23820 if (conds == CONDS_SET)
23822 rtx set = single_set (insn);
23823 cfun->machine->thumb1_cc_insn = insn;
23824 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
23825 cfun->machine->thumb1_cc_op1 = const0_rtx;
23826 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
23827 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
23829 rtx src1 = XEXP (SET_SRC (set), 1);
23830 if (src1 == const0_rtx)
23831 cfun->machine->thumb1_cc_mode = CCmode;
23833 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
23835 /* Record the src register operand instead of dest because
23836 cprop_hardreg pass propagates src. */
23837 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
23840 else if (conds != CONDS_NOCOND)
23841 cfun->machine->thumb1_cc_insn = NULL_RTX;
23844 /* Check if unexpected far jump is used. */
23845 if (cfun->machine->lr_save_eliminated
23846 && get_attr_far_jump (insn) == FAR_JUMP_YES)
23847 internal_error("Unexpected thumb1 far jump");
23851 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
23853 unsigned HOST_WIDE_INT mask = 0xff;
23854 int i;
23856 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
23857 if (val == 0) /* XXX */
23858 return 0;
23860 for (i = 0; i < 25; i++)
23861 if ((val & (mask << i)) == val)
23862 return 1;
23864 return 0;
23867 /* Returns nonzero if the current function contains,
23868 or might contain a far jump. */
23869 static int
23870 thumb_far_jump_used_p (void)
23872 rtx_insn *insn;
23873 bool far_jump = false;
23874 unsigned int func_size = 0;
23876 /* This test is only important for leaf functions. */
23877 /* assert (!leaf_function_p ()); */
23879 /* If we have already decided that far jumps may be used,
23880 do not bother checking again, and always return true even if
23881 it turns out that they are not being used. Once we have made
23882 the decision that far jumps are present (and that hence the link
23883 register will be pushed onto the stack) we cannot go back on it. */
23884 if (cfun->machine->far_jump_used)
23885 return 1;
23887 /* If this function is not being called from the prologue/epilogue
23888 generation code then it must be being called from the
23889 INITIAL_ELIMINATION_OFFSET macro. */
23890 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
23892 /* In this case we know that we are being asked about the elimination
23893 of the arg pointer register. If that register is not being used,
23894 then there are no arguments on the stack, and we do not have to
23895 worry that a far jump might force the prologue to push the link
23896 register, changing the stack offsets. In this case we can just
23897 return false, since the presence of far jumps in the function will
23898 not affect stack offsets.
23900 If the arg pointer is live (or if it was live, but has now been
23901 eliminated and so set to dead) then we do have to test to see if
23902 the function might contain a far jump. This test can lead to some
23903 false negatives, since before reload is completed, then length of
23904 branch instructions is not known, so gcc defaults to returning their
23905 longest length, which in turn sets the far jump attribute to true.
23907 A false negative will not result in bad code being generated, but it
23908 will result in a needless push and pop of the link register. We
23909 hope that this does not occur too often.
23911 If we need doubleword stack alignment this could affect the other
23912 elimination offsets so we can't risk getting it wrong. */
23913 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
23914 cfun->machine->arg_pointer_live = 1;
23915 else if (!cfun->machine->arg_pointer_live)
23916 return 0;
23919 /* We should not change far_jump_used during or after reload, as there is
23920 no chance to change stack frame layout. */
23921 if (reload_in_progress || reload_completed)
23922 return 0;
23924 /* Check to see if the function contains a branch
23925 insn with the far jump attribute set. */
23926 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23928 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
23930 far_jump = true;
23932 func_size += get_attr_length (insn);
23935 /* Attribute far_jump will always be true for thumb1 before
23936 shorten_branch pass. So checking far_jump attribute before
23937 shorten_branch isn't much useful.
23939 Following heuristic tries to estimate more accurately if a far jump
23940 may finally be used. The heuristic is very conservative as there is
23941 no chance to roll-back the decision of not to use far jump.
23943 Thumb1 long branch offset is -2048 to 2046. The worst case is each
23944 2-byte insn is associated with a 4 byte constant pool. Using
23945 function size 2048/3 as the threshold is conservative enough. */
23946 if (far_jump)
23948 if ((func_size * 3) >= 2048)
23950 /* Record the fact that we have decided that
23951 the function does use far jumps. */
23952 cfun->machine->far_jump_used = 1;
23953 return 1;
23957 return 0;
23960 /* Return nonzero if FUNC must be entered in ARM mode. */
23962 is_called_in_ARM_mode (tree func)
23964 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
23966 /* Ignore the problem about functions whose address is taken. */
23967 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
23968 return TRUE;
23970 #ifdef ARM_PE
23971 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
23972 #else
23973 return FALSE;
23974 #endif
23977 /* Given the stack offsets and register mask in OFFSETS, decide how
23978 many additional registers to push instead of subtracting a constant
23979 from SP. For epilogues the principle is the same except we use pop.
23980 FOR_PROLOGUE indicates which we're generating. */
23981 static int
23982 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
23984 HOST_WIDE_INT amount;
23985 unsigned long live_regs_mask = offsets->saved_regs_mask;
23986 /* Extract a mask of the ones we can give to the Thumb's push/pop
23987 instruction. */
23988 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
23989 /* Then count how many other high registers will need to be pushed. */
23990 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23991 int n_free, reg_base, size;
23993 if (!for_prologue && frame_pointer_needed)
23994 amount = offsets->locals_base - offsets->saved_regs;
23995 else
23996 amount = offsets->outgoing_args - offsets->saved_regs;
23998 /* If the stack frame size is 512 exactly, we can save one load
23999 instruction, which should make this a win even when optimizing
24000 for speed. */
24001 if (!optimize_size && amount != 512)
24002 return 0;
24004 /* Can't do this if there are high registers to push. */
24005 if (high_regs_pushed != 0)
24006 return 0;
24008 /* Shouldn't do it in the prologue if no registers would normally
24009 be pushed at all. In the epilogue, also allow it if we'll have
24010 a pop insn for the PC. */
24011 if (l_mask == 0
24012 && (for_prologue
24013 || TARGET_BACKTRACE
24014 || (live_regs_mask & 1 << LR_REGNUM) == 0
24015 || TARGET_INTERWORK
24016 || crtl->args.pretend_args_size != 0))
24017 return 0;
24019 /* Don't do this if thumb_expand_prologue wants to emit instructions
24020 between the push and the stack frame allocation. */
24021 if (for_prologue
24022 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24023 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24024 return 0;
24026 reg_base = 0;
24027 n_free = 0;
24028 if (!for_prologue)
24030 size = arm_size_return_regs ();
24031 reg_base = ARM_NUM_INTS (size);
24032 live_regs_mask >>= reg_base;
24035 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24036 && (for_prologue || call_used_regs[reg_base + n_free]))
24038 live_regs_mask >>= 1;
24039 n_free++;
24042 if (n_free == 0)
24043 return 0;
24044 gcc_assert (amount / 4 * 4 == amount);
24046 if (amount >= 512 && (amount - n_free * 4) < 512)
24047 return (amount - 508) / 4;
24048 if (amount <= n_free * 4)
24049 return amount / 4;
24050 return 0;
24053 /* The bits which aren't usefully expanded as rtl. */
24054 const char *
24055 thumb1_unexpanded_epilogue (void)
24057 arm_stack_offsets *offsets;
24058 int regno;
24059 unsigned long live_regs_mask = 0;
24060 int high_regs_pushed = 0;
24061 int extra_pop;
24062 int had_to_push_lr;
24063 int size;
24065 if (cfun->machine->return_used_this_function != 0)
24066 return "";
24068 if (IS_NAKED (arm_current_func_type ()))
24069 return "";
24071 offsets = arm_get_frame_offsets ();
24072 live_regs_mask = offsets->saved_regs_mask;
24073 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24075 /* If we can deduce the registers used from the function's return value.
24076 This is more reliable that examining df_regs_ever_live_p () because that
24077 will be set if the register is ever used in the function, not just if
24078 the register is used to hold a return value. */
24079 size = arm_size_return_regs ();
24081 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24082 if (extra_pop > 0)
24084 unsigned long extra_mask = (1 << extra_pop) - 1;
24085 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24088 /* The prolog may have pushed some high registers to use as
24089 work registers. e.g. the testsuite file:
24090 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24091 compiles to produce:
24092 push {r4, r5, r6, r7, lr}
24093 mov r7, r9
24094 mov r6, r8
24095 push {r6, r7}
24096 as part of the prolog. We have to undo that pushing here. */
24098 if (high_regs_pushed)
24100 unsigned long mask = live_regs_mask & 0xff;
24101 int next_hi_reg;
24103 /* The available low registers depend on the size of the value we are
24104 returning. */
24105 if (size <= 12)
24106 mask |= 1 << 3;
24107 if (size <= 8)
24108 mask |= 1 << 2;
24110 if (mask == 0)
24111 /* Oh dear! We have no low registers into which we can pop
24112 high registers! */
24113 internal_error
24114 ("no low registers available for popping high registers");
24116 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24117 if (live_regs_mask & (1 << next_hi_reg))
24118 break;
24120 while (high_regs_pushed)
24122 /* Find lo register(s) into which the high register(s) can
24123 be popped. */
24124 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24126 if (mask & (1 << regno))
24127 high_regs_pushed--;
24128 if (high_regs_pushed == 0)
24129 break;
24132 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24134 /* Pop the values into the low register(s). */
24135 thumb_pop (asm_out_file, mask);
24137 /* Move the value(s) into the high registers. */
24138 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24140 if (mask & (1 << regno))
24142 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24143 regno);
24145 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24146 if (live_regs_mask & (1 << next_hi_reg))
24147 break;
24151 live_regs_mask &= ~0x0f00;
24154 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24155 live_regs_mask &= 0xff;
24157 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24159 /* Pop the return address into the PC. */
24160 if (had_to_push_lr)
24161 live_regs_mask |= 1 << PC_REGNUM;
24163 /* Either no argument registers were pushed or a backtrace
24164 structure was created which includes an adjusted stack
24165 pointer, so just pop everything. */
24166 if (live_regs_mask)
24167 thumb_pop (asm_out_file, live_regs_mask);
24169 /* We have either just popped the return address into the
24170 PC or it is was kept in LR for the entire function.
24171 Note that thumb_pop has already called thumb_exit if the
24172 PC was in the list. */
24173 if (!had_to_push_lr)
24174 thumb_exit (asm_out_file, LR_REGNUM);
24176 else
24178 /* Pop everything but the return address. */
24179 if (live_regs_mask)
24180 thumb_pop (asm_out_file, live_regs_mask);
24182 if (had_to_push_lr)
24184 if (size > 12)
24186 /* We have no free low regs, so save one. */
24187 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24188 LAST_ARG_REGNUM);
24191 /* Get the return address into a temporary register. */
24192 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24194 if (size > 12)
24196 /* Move the return address to lr. */
24197 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24198 LAST_ARG_REGNUM);
24199 /* Restore the low register. */
24200 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24201 IP_REGNUM);
24202 regno = LR_REGNUM;
24204 else
24205 regno = LAST_ARG_REGNUM;
24207 else
24208 regno = LR_REGNUM;
24210 /* Remove the argument registers that were pushed onto the stack. */
24211 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24212 SP_REGNUM, SP_REGNUM,
24213 crtl->args.pretend_args_size);
24215 thumb_exit (asm_out_file, regno);
24218 return "";
24221 /* Functions to save and restore machine-specific function data. */
24222 static struct machine_function *
24223 arm_init_machine_status (void)
24225 struct machine_function *machine;
24226 machine = ggc_cleared_alloc<machine_function> ();
24228 #if ARM_FT_UNKNOWN != 0
24229 machine->func_type = ARM_FT_UNKNOWN;
24230 #endif
24231 return machine;
24234 /* Return an RTX indicating where the return address to the
24235 calling function can be found. */
24237 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24239 if (count != 0)
24240 return NULL_RTX;
24242 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24245 /* Do anything needed before RTL is emitted for each function. */
24246 void
24247 arm_init_expanders (void)
24249 /* Arrange to initialize and mark the machine per-function status. */
24250 init_machine_status = arm_init_machine_status;
24252 /* This is to stop the combine pass optimizing away the alignment
24253 adjustment of va_arg. */
24254 /* ??? It is claimed that this should not be necessary. */
24255 if (cfun)
24256 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24260 /* Like arm_compute_initial_elimination offset. Simpler because there
24261 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24262 to point at the base of the local variables after static stack
24263 space for a function has been allocated. */
24265 HOST_WIDE_INT
24266 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24268 arm_stack_offsets *offsets;
24270 offsets = arm_get_frame_offsets ();
24272 switch (from)
24274 case ARG_POINTER_REGNUM:
24275 switch (to)
24277 case STACK_POINTER_REGNUM:
24278 return offsets->outgoing_args - offsets->saved_args;
24280 case FRAME_POINTER_REGNUM:
24281 return offsets->soft_frame - offsets->saved_args;
24283 case ARM_HARD_FRAME_POINTER_REGNUM:
24284 return offsets->saved_regs - offsets->saved_args;
24286 case THUMB_HARD_FRAME_POINTER_REGNUM:
24287 return offsets->locals_base - offsets->saved_args;
24289 default:
24290 gcc_unreachable ();
24292 break;
24294 case FRAME_POINTER_REGNUM:
24295 switch (to)
24297 case STACK_POINTER_REGNUM:
24298 return offsets->outgoing_args - offsets->soft_frame;
24300 case ARM_HARD_FRAME_POINTER_REGNUM:
24301 return offsets->saved_regs - offsets->soft_frame;
24303 case THUMB_HARD_FRAME_POINTER_REGNUM:
24304 return offsets->locals_base - offsets->soft_frame;
24306 default:
24307 gcc_unreachable ();
24309 break;
24311 default:
24312 gcc_unreachable ();
24316 /* Generate the function's prologue. */
24318 void
24319 thumb1_expand_prologue (void)
24321 rtx_insn *insn;
24323 HOST_WIDE_INT amount;
24324 arm_stack_offsets *offsets;
24325 unsigned long func_type;
24326 int regno;
24327 unsigned long live_regs_mask;
24328 unsigned long l_mask;
24329 unsigned high_regs_pushed = 0;
24331 func_type = arm_current_func_type ();
24333 /* Naked functions don't have prologues. */
24334 if (IS_NAKED (func_type))
24335 return;
24337 if (IS_INTERRUPT (func_type))
24339 error ("interrupt Service Routines cannot be coded in Thumb mode");
24340 return;
24343 if (is_called_in_ARM_mode (current_function_decl))
24344 emit_insn (gen_prologue_thumb1_interwork ());
24346 offsets = arm_get_frame_offsets ();
24347 live_regs_mask = offsets->saved_regs_mask;
24349 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24350 l_mask = live_regs_mask & 0x40ff;
24351 /* Then count how many other high registers will need to be pushed. */
24352 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24354 if (crtl->args.pretend_args_size)
24356 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24358 if (cfun->machine->uses_anonymous_args)
24360 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24361 unsigned long mask;
24363 mask = 1ul << (LAST_ARG_REGNUM + 1);
24364 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24366 insn = thumb1_emit_multi_reg_push (mask, 0);
24368 else
24370 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24371 stack_pointer_rtx, x));
24373 RTX_FRAME_RELATED_P (insn) = 1;
24376 if (TARGET_BACKTRACE)
24378 HOST_WIDE_INT offset = 0;
24379 unsigned work_register;
24380 rtx work_reg, x, arm_hfp_rtx;
24382 /* We have been asked to create a stack backtrace structure.
24383 The code looks like this:
24385 0 .align 2
24386 0 func:
24387 0 sub SP, #16 Reserve space for 4 registers.
24388 2 push {R7} Push low registers.
24389 4 add R7, SP, #20 Get the stack pointer before the push.
24390 6 str R7, [SP, #8] Store the stack pointer
24391 (before reserving the space).
24392 8 mov R7, PC Get hold of the start of this code + 12.
24393 10 str R7, [SP, #16] Store it.
24394 12 mov R7, FP Get hold of the current frame pointer.
24395 14 str R7, [SP, #4] Store it.
24396 16 mov R7, LR Get hold of the current return address.
24397 18 str R7, [SP, #12] Store it.
24398 20 add R7, SP, #16 Point at the start of the
24399 backtrace structure.
24400 22 mov FP, R7 Put this value into the frame pointer. */
24402 work_register = thumb_find_work_register (live_regs_mask);
24403 work_reg = gen_rtx_REG (SImode, work_register);
24404 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24406 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24407 stack_pointer_rtx, GEN_INT (-16)));
24408 RTX_FRAME_RELATED_P (insn) = 1;
24410 if (l_mask)
24412 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24413 RTX_FRAME_RELATED_P (insn) = 1;
24415 offset = bit_count (l_mask) * UNITS_PER_WORD;
24418 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24419 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24421 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24422 x = gen_frame_mem (SImode, x);
24423 emit_move_insn (x, work_reg);
24425 /* Make sure that the instruction fetching the PC is in the right place
24426 to calculate "start of backtrace creation code + 12". */
24427 /* ??? The stores using the common WORK_REG ought to be enough to
24428 prevent the scheduler from doing anything weird. Failing that
24429 we could always move all of the following into an UNSPEC_VOLATILE. */
24430 if (l_mask)
24432 x = gen_rtx_REG (SImode, PC_REGNUM);
24433 emit_move_insn (work_reg, x);
24435 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24436 x = gen_frame_mem (SImode, x);
24437 emit_move_insn (x, work_reg);
24439 emit_move_insn (work_reg, arm_hfp_rtx);
24441 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24442 x = gen_frame_mem (SImode, x);
24443 emit_move_insn (x, work_reg);
24445 else
24447 emit_move_insn (work_reg, arm_hfp_rtx);
24449 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24450 x = gen_frame_mem (SImode, x);
24451 emit_move_insn (x, work_reg);
24453 x = gen_rtx_REG (SImode, PC_REGNUM);
24454 emit_move_insn (work_reg, x);
24456 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24457 x = gen_frame_mem (SImode, x);
24458 emit_move_insn (x, work_reg);
24461 x = gen_rtx_REG (SImode, LR_REGNUM);
24462 emit_move_insn (work_reg, x);
24464 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24465 x = gen_frame_mem (SImode, x);
24466 emit_move_insn (x, work_reg);
24468 x = GEN_INT (offset + 12);
24469 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24471 emit_move_insn (arm_hfp_rtx, work_reg);
24473 /* Optimization: If we are not pushing any low registers but we are going
24474 to push some high registers then delay our first push. This will just
24475 be a push of LR and we can combine it with the push of the first high
24476 register. */
24477 else if ((l_mask & 0xff) != 0
24478 || (high_regs_pushed == 0 && l_mask))
24480 unsigned long mask = l_mask;
24481 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24482 insn = thumb1_emit_multi_reg_push (mask, mask);
24483 RTX_FRAME_RELATED_P (insn) = 1;
24486 if (high_regs_pushed)
24488 unsigned pushable_regs;
24489 unsigned next_hi_reg;
24490 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24491 : crtl->args.info.nregs;
24492 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24494 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24495 if (live_regs_mask & (1 << next_hi_reg))
24496 break;
24498 /* Here we need to mask out registers used for passing arguments
24499 even if they can be pushed. This is to avoid using them to stash the high
24500 registers. Such kind of stash may clobber the use of arguments. */
24501 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
24503 if (pushable_regs == 0)
24504 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24506 while (high_regs_pushed > 0)
24508 unsigned long real_regs_mask = 0;
24510 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
24512 if (pushable_regs & (1 << regno))
24514 emit_move_insn (gen_rtx_REG (SImode, regno),
24515 gen_rtx_REG (SImode, next_hi_reg));
24517 high_regs_pushed --;
24518 real_regs_mask |= (1 << next_hi_reg);
24520 if (high_regs_pushed)
24522 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24523 next_hi_reg --)
24524 if (live_regs_mask & (1 << next_hi_reg))
24525 break;
24527 else
24529 pushable_regs &= ~((1 << regno) - 1);
24530 break;
24535 /* If we had to find a work register and we have not yet
24536 saved the LR then add it to the list of regs to push. */
24537 if (l_mask == (1 << LR_REGNUM))
24539 pushable_regs |= l_mask;
24540 real_regs_mask |= l_mask;
24541 l_mask = 0;
24544 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
24545 RTX_FRAME_RELATED_P (insn) = 1;
24549 /* Load the pic register before setting the frame pointer,
24550 so we can use r7 as a temporary work register. */
24551 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24552 arm_load_pic_register (live_regs_mask);
24554 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24555 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24556 stack_pointer_rtx);
24558 if (flag_stack_usage_info)
24559 current_function_static_stack_size
24560 = offsets->outgoing_args - offsets->saved_args;
24562 amount = offsets->outgoing_args - offsets->saved_regs;
24563 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24564 if (amount)
24566 if (amount < 512)
24568 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24569 GEN_INT (- amount)));
24570 RTX_FRAME_RELATED_P (insn) = 1;
24572 else
24574 rtx reg, dwarf;
24576 /* The stack decrement is too big for an immediate value in a single
24577 insn. In theory we could issue multiple subtracts, but after
24578 three of them it becomes more space efficient to place the full
24579 value in the constant pool and load into a register. (Also the
24580 ARM debugger really likes to see only one stack decrement per
24581 function). So instead we look for a scratch register into which
24582 we can load the decrement, and then we subtract this from the
24583 stack pointer. Unfortunately on the thumb the only available
24584 scratch registers are the argument registers, and we cannot use
24585 these as they may hold arguments to the function. Instead we
24586 attempt to locate a call preserved register which is used by this
24587 function. If we can find one, then we know that it will have
24588 been pushed at the start of the prologue and so we can corrupt
24589 it now. */
24590 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24591 if (live_regs_mask & (1 << regno))
24592 break;
24594 gcc_assert(regno <= LAST_LO_REGNUM);
24596 reg = gen_rtx_REG (SImode, regno);
24598 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24600 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24601 stack_pointer_rtx, reg));
24603 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
24604 plus_constant (Pmode, stack_pointer_rtx,
24605 -amount));
24606 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24607 RTX_FRAME_RELATED_P (insn) = 1;
24611 if (frame_pointer_needed)
24612 thumb_set_frame_pointer (offsets);
24614 /* If we are profiling, make sure no instructions are scheduled before
24615 the call to mcount. Similarly if the user has requested no
24616 scheduling in the prolog. Similarly if we want non-call exceptions
24617 using the EABI unwinder, to prevent faulting instructions from being
24618 swapped with a stack adjustment. */
24619 if (crtl->profile || !TARGET_SCHED_PROLOG
24620 || (arm_except_unwind_info (&global_options) == UI_TARGET
24621 && cfun->can_throw_non_call_exceptions))
24622 emit_insn (gen_blockage ());
24624 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24625 if (live_regs_mask & 0xff)
24626 cfun->machine->lr_save_eliminated = 0;
24629 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
24630 POP instruction can be generated. LR should be replaced by PC. All
24631 the checks required are already done by USE_RETURN_INSN (). Hence,
24632 all we really need to check here is if single register is to be
24633 returned, or multiple register return. */
24634 void
24635 thumb2_expand_return (bool simple_return)
24637 int i, num_regs;
24638 unsigned long saved_regs_mask;
24639 arm_stack_offsets *offsets;
24641 offsets = arm_get_frame_offsets ();
24642 saved_regs_mask = offsets->saved_regs_mask;
24644 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
24645 if (saved_regs_mask & (1 << i))
24646 num_regs++;
24648 if (!simple_return && saved_regs_mask)
24650 if (num_regs == 1)
24652 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
24653 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
24654 rtx addr = gen_rtx_MEM (SImode,
24655 gen_rtx_POST_INC (SImode,
24656 stack_pointer_rtx));
24657 set_mem_alias_set (addr, get_frame_alias_set ());
24658 XVECEXP (par, 0, 0) = ret_rtx;
24659 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
24660 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
24661 emit_jump_insn (par);
24663 else
24665 saved_regs_mask &= ~ (1 << LR_REGNUM);
24666 saved_regs_mask |= (1 << PC_REGNUM);
24667 arm_emit_multi_reg_pop (saved_regs_mask);
24670 else
24672 emit_jump_insn (simple_return_rtx);
24676 void
24677 thumb1_expand_epilogue (void)
24679 HOST_WIDE_INT amount;
24680 arm_stack_offsets *offsets;
24681 int regno;
24683 /* Naked functions don't have prologues. */
24684 if (IS_NAKED (arm_current_func_type ()))
24685 return;
24687 offsets = arm_get_frame_offsets ();
24688 amount = offsets->outgoing_args - offsets->saved_regs;
24690 if (frame_pointer_needed)
24692 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
24693 amount = offsets->locals_base - offsets->saved_regs;
24695 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
24697 gcc_assert (amount >= 0);
24698 if (amount)
24700 emit_insn (gen_blockage ());
24702 if (amount < 512)
24703 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24704 GEN_INT (amount)));
24705 else
24707 /* r3 is always free in the epilogue. */
24708 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
24710 emit_insn (gen_movsi (reg, GEN_INT (amount)));
24711 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
24715 /* Emit a USE (stack_pointer_rtx), so that
24716 the stack adjustment will not be deleted. */
24717 emit_insn (gen_force_register_use (stack_pointer_rtx));
24719 if (crtl->profile || !TARGET_SCHED_PROLOG)
24720 emit_insn (gen_blockage ());
24722 /* Emit a clobber for each insn that will be restored in the epilogue,
24723 so that flow2 will get register lifetimes correct. */
24724 for (regno = 0; regno < 13; regno++)
24725 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
24726 emit_clobber (gen_rtx_REG (SImode, regno));
24728 if (! df_regs_ever_live_p (LR_REGNUM))
24729 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
24732 /* Epilogue code for APCS frame. */
24733 static void
24734 arm_expand_epilogue_apcs_frame (bool really_return)
24736 unsigned long func_type;
24737 unsigned long saved_regs_mask;
24738 int num_regs = 0;
24739 int i;
24740 int floats_from_frame = 0;
24741 arm_stack_offsets *offsets;
24743 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
24744 func_type = arm_current_func_type ();
24746 /* Get frame offsets for ARM. */
24747 offsets = arm_get_frame_offsets ();
24748 saved_regs_mask = offsets->saved_regs_mask;
24750 /* Find the offset of the floating-point save area in the frame. */
24751 floats_from_frame
24752 = (offsets->saved_args
24753 + arm_compute_static_chain_stack_bytes ()
24754 - offsets->frame);
24756 /* Compute how many core registers saved and how far away the floats are. */
24757 for (i = 0; i <= LAST_ARM_REGNUM; i++)
24758 if (saved_regs_mask & (1 << i))
24760 num_regs++;
24761 floats_from_frame += 4;
24764 if (TARGET_HARD_FLOAT && TARGET_VFP)
24766 int start_reg;
24767 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
24769 /* The offset is from IP_REGNUM. */
24770 int saved_size = arm_get_vfp_saved_size ();
24771 if (saved_size > 0)
24773 rtx_insn *insn;
24774 floats_from_frame += saved_size;
24775 insn = emit_insn (gen_addsi3 (ip_rtx,
24776 hard_frame_pointer_rtx,
24777 GEN_INT (-floats_from_frame)));
24778 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
24779 ip_rtx, hard_frame_pointer_rtx);
24782 /* Generate VFP register multi-pop. */
24783 start_reg = FIRST_VFP_REGNUM;
24785 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
24786 /* Look for a case where a reg does not need restoring. */
24787 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24788 && (!df_regs_ever_live_p (i + 1)
24789 || call_used_regs[i + 1]))
24791 if (start_reg != i)
24792 arm_emit_vfp_multi_reg_pop (start_reg,
24793 (i - start_reg) / 2,
24794 gen_rtx_REG (SImode,
24795 IP_REGNUM));
24796 start_reg = i + 2;
24799 /* Restore the remaining regs that we have discovered (or possibly
24800 even all of them, if the conditional in the for loop never
24801 fired). */
24802 if (start_reg != i)
24803 arm_emit_vfp_multi_reg_pop (start_reg,
24804 (i - start_reg) / 2,
24805 gen_rtx_REG (SImode, IP_REGNUM));
24808 if (TARGET_IWMMXT)
24810 /* The frame pointer is guaranteed to be non-double-word aligned, as
24811 it is set to double-word-aligned old_stack_pointer - 4. */
24812 rtx_insn *insn;
24813 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
24815 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
24816 if (df_regs_ever_live_p (i) && !call_used_regs[i])
24818 rtx addr = gen_frame_mem (V2SImode,
24819 plus_constant (Pmode, hard_frame_pointer_rtx,
24820 - lrm_count * 4));
24821 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24822 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24823 gen_rtx_REG (V2SImode, i),
24824 NULL_RTX);
24825 lrm_count += 2;
24829 /* saved_regs_mask should contain IP which contains old stack pointer
24830 at the time of activation creation. Since SP and IP are adjacent registers,
24831 we can restore the value directly into SP. */
24832 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
24833 saved_regs_mask &= ~(1 << IP_REGNUM);
24834 saved_regs_mask |= (1 << SP_REGNUM);
24836 /* There are two registers left in saved_regs_mask - LR and PC. We
24837 only need to restore LR (the return address), but to
24838 save time we can load it directly into PC, unless we need a
24839 special function exit sequence, or we are not really returning. */
24840 if (really_return
24841 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
24842 && !crtl->calls_eh_return)
24843 /* Delete LR from the register mask, so that LR on
24844 the stack is loaded into the PC in the register mask. */
24845 saved_regs_mask &= ~(1 << LR_REGNUM);
24846 else
24847 saved_regs_mask &= ~(1 << PC_REGNUM);
24849 num_regs = bit_count (saved_regs_mask);
24850 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
24852 rtx_insn *insn;
24853 emit_insn (gen_blockage ());
24854 /* Unwind the stack to just below the saved registers. */
24855 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24856 hard_frame_pointer_rtx,
24857 GEN_INT (- 4 * num_regs)));
24859 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
24860 stack_pointer_rtx, hard_frame_pointer_rtx);
24863 arm_emit_multi_reg_pop (saved_regs_mask);
24865 if (IS_INTERRUPT (func_type))
24867 /* Interrupt handlers will have pushed the
24868 IP onto the stack, so restore it now. */
24869 rtx_insn *insn;
24870 rtx addr = gen_rtx_MEM (SImode,
24871 gen_rtx_POST_INC (SImode,
24872 stack_pointer_rtx));
24873 set_mem_alias_set (addr, get_frame_alias_set ());
24874 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
24875 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24876 gen_rtx_REG (SImode, IP_REGNUM),
24877 NULL_RTX);
24880 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
24881 return;
24883 if (crtl->calls_eh_return)
24884 emit_insn (gen_addsi3 (stack_pointer_rtx,
24885 stack_pointer_rtx,
24886 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
24888 if (IS_STACKALIGN (func_type))
24889 /* Restore the original stack pointer. Before prologue, the stack was
24890 realigned and the original stack pointer saved in r0. For details,
24891 see comment in arm_expand_prologue. */
24892 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
24894 emit_jump_insn (simple_return_rtx);
24897 /* Generate RTL to represent ARM epilogue. Really_return is true if the
24898 function is not a sibcall. */
24899 void
24900 arm_expand_epilogue (bool really_return)
24902 unsigned long func_type;
24903 unsigned long saved_regs_mask;
24904 int num_regs = 0;
24905 int i;
24906 int amount;
24907 arm_stack_offsets *offsets;
24909 func_type = arm_current_func_type ();
24911 /* Naked functions don't have epilogue. Hence, generate return pattern, and
24912 let output_return_instruction take care of instruction emission if any. */
24913 if (IS_NAKED (func_type)
24914 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
24916 if (really_return)
24917 emit_jump_insn (simple_return_rtx);
24918 return;
24921 /* If we are throwing an exception, then we really must be doing a
24922 return, so we can't tail-call. */
24923 gcc_assert (!crtl->calls_eh_return || really_return);
24925 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
24927 arm_expand_epilogue_apcs_frame (really_return);
24928 return;
24931 /* Get frame offsets for ARM. */
24932 offsets = arm_get_frame_offsets ();
24933 saved_regs_mask = offsets->saved_regs_mask;
24934 num_regs = bit_count (saved_regs_mask);
24936 if (frame_pointer_needed)
24938 rtx_insn *insn;
24939 /* Restore stack pointer if necessary. */
24940 if (TARGET_ARM)
24942 /* In ARM mode, frame pointer points to first saved register.
24943 Restore stack pointer to last saved register. */
24944 amount = offsets->frame - offsets->saved_regs;
24946 /* Force out any pending memory operations that reference stacked data
24947 before stack de-allocation occurs. */
24948 emit_insn (gen_blockage ());
24949 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24950 hard_frame_pointer_rtx,
24951 GEN_INT (amount)));
24952 arm_add_cfa_adjust_cfa_note (insn, amount,
24953 stack_pointer_rtx,
24954 hard_frame_pointer_rtx);
24956 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24957 deleted. */
24958 emit_insn (gen_force_register_use (stack_pointer_rtx));
24960 else
24962 /* In Thumb-2 mode, the frame pointer points to the last saved
24963 register. */
24964 amount = offsets->locals_base - offsets->saved_regs;
24965 if (amount)
24967 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
24968 hard_frame_pointer_rtx,
24969 GEN_INT (amount)));
24970 arm_add_cfa_adjust_cfa_note (insn, amount,
24971 hard_frame_pointer_rtx,
24972 hard_frame_pointer_rtx);
24975 /* Force out any pending memory operations that reference stacked data
24976 before stack de-allocation occurs. */
24977 emit_insn (gen_blockage ());
24978 insn = emit_insn (gen_movsi (stack_pointer_rtx,
24979 hard_frame_pointer_rtx));
24980 arm_add_cfa_adjust_cfa_note (insn, 0,
24981 stack_pointer_rtx,
24982 hard_frame_pointer_rtx);
24983 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24984 deleted. */
24985 emit_insn (gen_force_register_use (stack_pointer_rtx));
24988 else
24990 /* Pop off outgoing args and local frame to adjust stack pointer to
24991 last saved register. */
24992 amount = offsets->outgoing_args - offsets->saved_regs;
24993 if (amount)
24995 rtx_insn *tmp;
24996 /* Force out any pending memory operations that reference stacked data
24997 before stack de-allocation occurs. */
24998 emit_insn (gen_blockage ());
24999 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25000 stack_pointer_rtx,
25001 GEN_INT (amount)));
25002 arm_add_cfa_adjust_cfa_note (tmp, amount,
25003 stack_pointer_rtx, stack_pointer_rtx);
25004 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25005 not deleted. */
25006 emit_insn (gen_force_register_use (stack_pointer_rtx));
25010 if (TARGET_HARD_FLOAT && TARGET_VFP)
25012 /* Generate VFP register multi-pop. */
25013 int end_reg = LAST_VFP_REGNUM + 1;
25015 /* Scan the registers in reverse order. We need to match
25016 any groupings made in the prologue and generate matching
25017 vldm operations. The need to match groups is because,
25018 unlike pop, vldm can only do consecutive regs. */
25019 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25020 /* Look for a case where a reg does not need restoring. */
25021 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25022 && (!df_regs_ever_live_p (i + 1)
25023 || call_used_regs[i + 1]))
25025 /* Restore the regs discovered so far (from reg+2 to
25026 end_reg). */
25027 if (end_reg > i + 2)
25028 arm_emit_vfp_multi_reg_pop (i + 2,
25029 (end_reg - (i + 2)) / 2,
25030 stack_pointer_rtx);
25031 end_reg = i;
25034 /* Restore the remaining regs that we have discovered (or possibly
25035 even all of them, if the conditional in the for loop never
25036 fired). */
25037 if (end_reg > i + 2)
25038 arm_emit_vfp_multi_reg_pop (i + 2,
25039 (end_reg - (i + 2)) / 2,
25040 stack_pointer_rtx);
25043 if (TARGET_IWMMXT)
25044 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25045 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25047 rtx_insn *insn;
25048 rtx addr = gen_rtx_MEM (V2SImode,
25049 gen_rtx_POST_INC (SImode,
25050 stack_pointer_rtx));
25051 set_mem_alias_set (addr, get_frame_alias_set ());
25052 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25053 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25054 gen_rtx_REG (V2SImode, i),
25055 NULL_RTX);
25056 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25057 stack_pointer_rtx, stack_pointer_rtx);
25060 if (saved_regs_mask)
25062 rtx insn;
25063 bool return_in_pc = false;
25065 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25066 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25067 && !IS_STACKALIGN (func_type)
25068 && really_return
25069 && crtl->args.pretend_args_size == 0
25070 && saved_regs_mask & (1 << LR_REGNUM)
25071 && !crtl->calls_eh_return)
25073 saved_regs_mask &= ~(1 << LR_REGNUM);
25074 saved_regs_mask |= (1 << PC_REGNUM);
25075 return_in_pc = true;
25078 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25080 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25081 if (saved_regs_mask & (1 << i))
25083 rtx addr = gen_rtx_MEM (SImode,
25084 gen_rtx_POST_INC (SImode,
25085 stack_pointer_rtx));
25086 set_mem_alias_set (addr, get_frame_alias_set ());
25088 if (i == PC_REGNUM)
25090 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25091 XVECEXP (insn, 0, 0) = ret_rtx;
25092 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
25093 gen_rtx_REG (SImode, i),
25094 addr);
25095 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25096 insn = emit_jump_insn (insn);
25098 else
25100 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25101 addr));
25102 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25103 gen_rtx_REG (SImode, i),
25104 NULL_RTX);
25105 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25106 stack_pointer_rtx,
25107 stack_pointer_rtx);
25111 else
25113 if (TARGET_LDRD
25114 && current_tune->prefer_ldrd_strd
25115 && !optimize_function_for_size_p (cfun))
25117 if (TARGET_THUMB2)
25118 thumb2_emit_ldrd_pop (saved_regs_mask);
25119 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25120 arm_emit_ldrd_pop (saved_regs_mask);
25121 else
25122 arm_emit_multi_reg_pop (saved_regs_mask);
25124 else
25125 arm_emit_multi_reg_pop (saved_regs_mask);
25128 if (return_in_pc == true)
25129 return;
25132 if (crtl->args.pretend_args_size)
25134 int i, j;
25135 rtx dwarf = NULL_RTX;
25136 rtx_insn *tmp =
25137 emit_insn (gen_addsi3 (stack_pointer_rtx,
25138 stack_pointer_rtx,
25139 GEN_INT (crtl->args.pretend_args_size)));
25141 RTX_FRAME_RELATED_P (tmp) = 1;
25143 if (cfun->machine->uses_anonymous_args)
25145 /* Restore pretend args. Refer arm_expand_prologue on how to save
25146 pretend_args in stack. */
25147 int num_regs = crtl->args.pretend_args_size / 4;
25148 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25149 for (j = 0, i = 0; j < num_regs; i++)
25150 if (saved_regs_mask & (1 << i))
25152 rtx reg = gen_rtx_REG (SImode, i);
25153 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25154 j++;
25156 REG_NOTES (tmp) = dwarf;
25158 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
25159 stack_pointer_rtx, stack_pointer_rtx);
25162 if (!really_return)
25163 return;
25165 if (crtl->calls_eh_return)
25166 emit_insn (gen_addsi3 (stack_pointer_rtx,
25167 stack_pointer_rtx,
25168 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25170 if (IS_STACKALIGN (func_type))
25171 /* Restore the original stack pointer. Before prologue, the stack was
25172 realigned and the original stack pointer saved in r0. For details,
25173 see comment in arm_expand_prologue. */
25174 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
25176 emit_jump_insn (simple_return_rtx);
25179 /* Implementation of insn prologue_thumb1_interwork. This is the first
25180 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25182 const char *
25183 thumb1_output_interwork (void)
25185 const char * name;
25186 FILE *f = asm_out_file;
25188 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25189 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25190 == SYMBOL_REF);
25191 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25193 /* Generate code sequence to switch us into Thumb mode. */
25194 /* The .code 32 directive has already been emitted by
25195 ASM_DECLARE_FUNCTION_NAME. */
25196 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25197 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25199 /* Generate a label, so that the debugger will notice the
25200 change in instruction sets. This label is also used by
25201 the assembler to bypass the ARM code when this function
25202 is called from a Thumb encoded function elsewhere in the
25203 same file. Hence the definition of STUB_NAME here must
25204 agree with the definition in gas/config/tc-arm.c. */
25206 #define STUB_NAME ".real_start_of"
25208 fprintf (f, "\t.code\t16\n");
25209 #ifdef ARM_PE
25210 if (arm_dllexport_name_p (name))
25211 name = arm_strip_name_encoding (name);
25212 #endif
25213 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25214 fprintf (f, "\t.thumb_func\n");
25215 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25217 return "";
25220 /* Handle the case of a double word load into a low register from
25221 a computed memory address. The computed address may involve a
25222 register which is overwritten by the load. */
25223 const char *
25224 thumb_load_double_from_address (rtx *operands)
25226 rtx addr;
25227 rtx base;
25228 rtx offset;
25229 rtx arg1;
25230 rtx arg2;
25232 gcc_assert (REG_P (operands[0]));
25233 gcc_assert (MEM_P (operands[1]));
25235 /* Get the memory address. */
25236 addr = XEXP (operands[1], 0);
25238 /* Work out how the memory address is computed. */
25239 switch (GET_CODE (addr))
25241 case REG:
25242 operands[2] = adjust_address (operands[1], SImode, 4);
25244 if (REGNO (operands[0]) == REGNO (addr))
25246 output_asm_insn ("ldr\t%H0, %2", operands);
25247 output_asm_insn ("ldr\t%0, %1", operands);
25249 else
25251 output_asm_insn ("ldr\t%0, %1", operands);
25252 output_asm_insn ("ldr\t%H0, %2", operands);
25254 break;
25256 case CONST:
25257 /* Compute <address> + 4 for the high order load. */
25258 operands[2] = adjust_address (operands[1], SImode, 4);
25260 output_asm_insn ("ldr\t%0, %1", operands);
25261 output_asm_insn ("ldr\t%H0, %2", operands);
25262 break;
25264 case PLUS:
25265 arg1 = XEXP (addr, 0);
25266 arg2 = XEXP (addr, 1);
25268 if (CONSTANT_P (arg1))
25269 base = arg2, offset = arg1;
25270 else
25271 base = arg1, offset = arg2;
25273 gcc_assert (REG_P (base));
25275 /* Catch the case of <address> = <reg> + <reg> */
25276 if (REG_P (offset))
25278 int reg_offset = REGNO (offset);
25279 int reg_base = REGNO (base);
25280 int reg_dest = REGNO (operands[0]);
25282 /* Add the base and offset registers together into the
25283 higher destination register. */
25284 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25285 reg_dest + 1, reg_base, reg_offset);
25287 /* Load the lower destination register from the address in
25288 the higher destination register. */
25289 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25290 reg_dest, reg_dest + 1);
25292 /* Load the higher destination register from its own address
25293 plus 4. */
25294 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25295 reg_dest + 1, reg_dest + 1);
25297 else
25299 /* Compute <address> + 4 for the high order load. */
25300 operands[2] = adjust_address (operands[1], SImode, 4);
25302 /* If the computed address is held in the low order register
25303 then load the high order register first, otherwise always
25304 load the low order register first. */
25305 if (REGNO (operands[0]) == REGNO (base))
25307 output_asm_insn ("ldr\t%H0, %2", operands);
25308 output_asm_insn ("ldr\t%0, %1", operands);
25310 else
25312 output_asm_insn ("ldr\t%0, %1", operands);
25313 output_asm_insn ("ldr\t%H0, %2", operands);
25316 break;
25318 case LABEL_REF:
25319 /* With no registers to worry about we can just load the value
25320 directly. */
25321 operands[2] = adjust_address (operands[1], SImode, 4);
25323 output_asm_insn ("ldr\t%H0, %2", operands);
25324 output_asm_insn ("ldr\t%0, %1", operands);
25325 break;
25327 default:
25328 gcc_unreachable ();
25331 return "";
25334 const char *
25335 thumb_output_move_mem_multiple (int n, rtx *operands)
25337 rtx tmp;
25339 switch (n)
25341 case 2:
25342 if (REGNO (operands[4]) > REGNO (operands[5]))
25344 tmp = operands[4];
25345 operands[4] = operands[5];
25346 operands[5] = tmp;
25348 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25349 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25350 break;
25352 case 3:
25353 if (REGNO (operands[4]) > REGNO (operands[5]))
25355 tmp = operands[4];
25356 operands[4] = operands[5];
25357 operands[5] = tmp;
25359 if (REGNO (operands[5]) > REGNO (operands[6]))
25361 tmp = operands[5];
25362 operands[5] = operands[6];
25363 operands[6] = tmp;
25365 if (REGNO (operands[4]) > REGNO (operands[5]))
25367 tmp = operands[4];
25368 operands[4] = operands[5];
25369 operands[5] = tmp;
25372 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25373 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25374 break;
25376 default:
25377 gcc_unreachable ();
25380 return "";
25383 /* Output a call-via instruction for thumb state. */
25384 const char *
25385 thumb_call_via_reg (rtx reg)
25387 int regno = REGNO (reg);
25388 rtx *labelp;
25390 gcc_assert (regno < LR_REGNUM);
25392 /* If we are in the normal text section we can use a single instance
25393 per compilation unit. If we are doing function sections, then we need
25394 an entry per section, since we can't rely on reachability. */
25395 if (in_section == text_section)
25397 thumb_call_reg_needed = 1;
25399 if (thumb_call_via_label[regno] == NULL)
25400 thumb_call_via_label[regno] = gen_label_rtx ();
25401 labelp = thumb_call_via_label + regno;
25403 else
25405 if (cfun->machine->call_via[regno] == NULL)
25406 cfun->machine->call_via[regno] = gen_label_rtx ();
25407 labelp = cfun->machine->call_via + regno;
25410 output_asm_insn ("bl\t%a0", labelp);
25411 return "";
25414 /* Routines for generating rtl. */
25415 void
25416 thumb_expand_movmemqi (rtx *operands)
25418 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25419 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25420 HOST_WIDE_INT len = INTVAL (operands[2]);
25421 HOST_WIDE_INT offset = 0;
25423 while (len >= 12)
25425 emit_insn (gen_movmem12b (out, in, out, in));
25426 len -= 12;
25429 if (len >= 8)
25431 emit_insn (gen_movmem8b (out, in, out, in));
25432 len -= 8;
25435 if (len >= 4)
25437 rtx reg = gen_reg_rtx (SImode);
25438 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25439 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25440 len -= 4;
25441 offset += 4;
25444 if (len >= 2)
25446 rtx reg = gen_reg_rtx (HImode);
25447 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25448 plus_constant (Pmode, in,
25449 offset))));
25450 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25451 offset)),
25452 reg));
25453 len -= 2;
25454 offset += 2;
25457 if (len)
25459 rtx reg = gen_reg_rtx (QImode);
25460 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25461 plus_constant (Pmode, in,
25462 offset))));
25463 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25464 offset)),
25465 reg));
25469 void
25470 thumb_reload_out_hi (rtx *operands)
25472 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25475 /* Handle reading a half-word from memory during reload. */
25476 void
25477 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
25479 gcc_unreachable ();
25482 /* Return the length of a function name prefix
25483 that starts with the character 'c'. */
25484 static int
25485 arm_get_strip_length (int c)
25487 switch (c)
25489 ARM_NAME_ENCODING_LENGTHS
25490 default: return 0;
25494 /* Return a pointer to a function's name with any
25495 and all prefix encodings stripped from it. */
25496 const char *
25497 arm_strip_name_encoding (const char *name)
25499 int skip;
25501 while ((skip = arm_get_strip_length (* name)))
25502 name += skip;
25504 return name;
25507 /* If there is a '*' anywhere in the name's prefix, then
25508 emit the stripped name verbatim, otherwise prepend an
25509 underscore if leading underscores are being used. */
25510 void
25511 arm_asm_output_labelref (FILE *stream, const char *name)
25513 int skip;
25514 int verbatim = 0;
25516 while ((skip = arm_get_strip_length (* name)))
25518 verbatim |= (*name == '*');
25519 name += skip;
25522 if (verbatim)
25523 fputs (name, stream);
25524 else
25525 asm_fprintf (stream, "%U%s", name);
25528 /* This function is used to emit an EABI tag and its associated value.
25529 We emit the numerical value of the tag in case the assembler does not
25530 support textual tags. (Eg gas prior to 2.20). If requested we include
25531 the tag name in a comment so that anyone reading the assembler output
25532 will know which tag is being set.
25534 This function is not static because arm-c.c needs it too. */
25536 void
25537 arm_emit_eabi_attribute (const char *name, int num, int val)
25539 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
25540 if (flag_verbose_asm || flag_debug_asm)
25541 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
25542 asm_fprintf (asm_out_file, "\n");
25545 static void
25546 arm_file_start (void)
25548 int val;
25550 if (TARGET_UNIFIED_ASM)
25551 asm_fprintf (asm_out_file, "\t.syntax unified\n");
25553 if (TARGET_BPABI)
25555 const char *fpu_name;
25556 if (arm_selected_arch)
25558 /* armv7ve doesn't support any extensions. */
25559 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
25561 /* Keep backward compatability for assemblers
25562 which don't support armv7ve. */
25563 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
25564 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
25565 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
25566 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
25567 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
25569 else
25571 const char* pos = strchr (arm_selected_arch->name, '+');
25572 if (pos)
25574 char buf[15];
25575 gcc_assert (strlen (arm_selected_arch->name)
25576 <= sizeof (buf) / sizeof (*pos));
25577 strncpy (buf, arm_selected_arch->name,
25578 (pos - arm_selected_arch->name) * sizeof (*pos));
25579 buf[pos - arm_selected_arch->name] = '\0';
25580 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
25581 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
25583 else
25584 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
25587 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
25588 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
25589 else
25591 const char* truncated_name
25592 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
25593 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
25596 if (TARGET_SOFT_FLOAT)
25598 fpu_name = "softvfp";
25600 else
25602 fpu_name = arm_fpu_desc->name;
25603 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
25605 if (TARGET_HARD_FLOAT)
25606 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
25607 if (TARGET_HARD_FLOAT_ABI)
25608 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
25611 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
25613 /* Some of these attributes only apply when the corresponding features
25614 are used. However we don't have any easy way of figuring this out.
25615 Conservatively record the setting that would have been used. */
25617 if (flag_rounding_math)
25618 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
25620 if (!flag_unsafe_math_optimizations)
25622 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
25623 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25625 if (flag_signaling_nans)
25626 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25628 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25629 flag_finite_math_only ? 1 : 3);
25631 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25632 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25633 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25634 flag_short_enums ? 1 : 2);
25636 /* Tag_ABI_optimization_goals. */
25637 if (optimize_size)
25638 val = 4;
25639 else if (optimize >= 2)
25640 val = 2;
25641 else if (optimize)
25642 val = 1;
25643 else
25644 val = 6;
25645 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
25647 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
25648 unaligned_access);
25650 if (arm_fp16_format)
25651 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
25652 (int) arm_fp16_format);
25654 if (arm_lang_output_object_attributes_hook)
25655 arm_lang_output_object_attributes_hook();
25658 default_file_start ();
25661 static void
25662 arm_file_end (void)
25664 int regno;
25666 if (NEED_INDICATE_EXEC_STACK)
25667 /* Add .note.GNU-stack. */
25668 file_end_indicate_exec_stack ();
25670 if (! thumb_call_reg_needed)
25671 return;
25673 switch_to_section (text_section);
25674 asm_fprintf (asm_out_file, "\t.code 16\n");
25675 ASM_OUTPUT_ALIGN (asm_out_file, 1);
25677 for (regno = 0; regno < LR_REGNUM; regno++)
25679 rtx label = thumb_call_via_label[regno];
25681 if (label != 0)
25683 targetm.asm_out.internal_label (asm_out_file, "L",
25684 CODE_LABEL_NUMBER (label));
25685 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
25690 #ifndef ARM_PE
25691 /* Symbols in the text segment can be accessed without indirecting via the
25692 constant pool; it may take an extra binary operation, but this is still
25693 faster than indirecting via memory. Don't do this when not optimizing,
25694 since we won't be calculating al of the offsets necessary to do this
25695 simplification. */
25697 static void
25698 arm_encode_section_info (tree decl, rtx rtl, int first)
25700 if (optimize > 0 && TREE_CONSTANT (decl))
25701 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
25703 default_encode_section_info (decl, rtl, first);
25705 #endif /* !ARM_PE */
25707 static void
25708 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
25710 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
25711 && !strcmp (prefix, "L"))
25713 arm_ccfsm_state = 0;
25714 arm_target_insn = NULL;
25716 default_internal_label (stream, prefix, labelno);
25719 /* Output code to add DELTA to the first argument, and then jump
25720 to FUNCTION. Used for C++ multiple inheritance. */
25721 static void
25722 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
25723 HOST_WIDE_INT delta,
25724 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
25725 tree function)
25727 static int thunk_label = 0;
25728 char label[256];
25729 char labelpc[256];
25730 int mi_delta = delta;
25731 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
25732 int shift = 0;
25733 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
25734 ? 1 : 0);
25735 if (mi_delta < 0)
25736 mi_delta = - mi_delta;
25738 final_start_function (emit_barrier (), file, 1);
25740 if (TARGET_THUMB1)
25742 int labelno = thunk_label++;
25743 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
25744 /* Thunks are entered in arm mode when avaiable. */
25745 if (TARGET_THUMB1_ONLY)
25747 /* push r3 so we can use it as a temporary. */
25748 /* TODO: Omit this save if r3 is not used. */
25749 fputs ("\tpush {r3}\n", file);
25750 fputs ("\tldr\tr3, ", file);
25752 else
25754 fputs ("\tldr\tr12, ", file);
25756 assemble_name (file, label);
25757 fputc ('\n', file);
25758 if (flag_pic)
25760 /* If we are generating PIC, the ldr instruction below loads
25761 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
25762 the address of the add + 8, so we have:
25764 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
25765 = target + 1.
25767 Note that we have "+ 1" because some versions of GNU ld
25768 don't set the low bit of the result for R_ARM_REL32
25769 relocations against thumb function symbols.
25770 On ARMv6M this is +4, not +8. */
25771 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
25772 assemble_name (file, labelpc);
25773 fputs (":\n", file);
25774 if (TARGET_THUMB1_ONLY)
25776 /* This is 2 insns after the start of the thunk, so we know it
25777 is 4-byte aligned. */
25778 fputs ("\tadd\tr3, pc, r3\n", file);
25779 fputs ("\tmov r12, r3\n", file);
25781 else
25782 fputs ("\tadd\tr12, pc, r12\n", file);
25784 else if (TARGET_THUMB1_ONLY)
25785 fputs ("\tmov r12, r3\n", file);
25787 if (TARGET_THUMB1_ONLY)
25789 if (mi_delta > 255)
25791 fputs ("\tldr\tr3, ", file);
25792 assemble_name (file, label);
25793 fputs ("+4\n", file);
25794 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
25795 mi_op, this_regno, this_regno);
25797 else if (mi_delta != 0)
25799 /* Thumb1 unified syntax requires s suffix in instruction name when
25800 one of the operands is immediate. */
25801 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
25802 mi_op, this_regno, this_regno,
25803 mi_delta);
25806 else
25808 /* TODO: Use movw/movt for large constants when available. */
25809 while (mi_delta != 0)
25811 if ((mi_delta & (3 << shift)) == 0)
25812 shift += 2;
25813 else
25815 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
25816 mi_op, this_regno, this_regno,
25817 mi_delta & (0xff << shift));
25818 mi_delta &= ~(0xff << shift);
25819 shift += 8;
25823 if (TARGET_THUMB1)
25825 if (TARGET_THUMB1_ONLY)
25826 fputs ("\tpop\t{r3}\n", file);
25828 fprintf (file, "\tbx\tr12\n");
25829 ASM_OUTPUT_ALIGN (file, 2);
25830 assemble_name (file, label);
25831 fputs (":\n", file);
25832 if (flag_pic)
25834 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
25835 rtx tem = XEXP (DECL_RTL (function), 0);
25836 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
25837 pipeline offset is four rather than eight. Adjust the offset
25838 accordingly. */
25839 tem = plus_constant (GET_MODE (tem), tem,
25840 TARGET_THUMB1_ONLY ? -3 : -7);
25841 tem = gen_rtx_MINUS (GET_MODE (tem),
25842 tem,
25843 gen_rtx_SYMBOL_REF (Pmode,
25844 ggc_strdup (labelpc)));
25845 assemble_integer (tem, 4, BITS_PER_WORD, 1);
25847 else
25848 /* Output ".word .LTHUNKn". */
25849 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
25851 if (TARGET_THUMB1_ONLY && mi_delta > 255)
25852 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
25854 else
25856 fputs ("\tb\t", file);
25857 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
25858 if (NEED_PLT_RELOC)
25859 fputs ("(PLT)", file);
25860 fputc ('\n', file);
25863 final_end_function ();
25867 arm_emit_vector_const (FILE *file, rtx x)
25869 int i;
25870 const char * pattern;
25872 gcc_assert (GET_CODE (x) == CONST_VECTOR);
25874 switch (GET_MODE (x))
25876 case V2SImode: pattern = "%08x"; break;
25877 case V4HImode: pattern = "%04x"; break;
25878 case V8QImode: pattern = "%02x"; break;
25879 default: gcc_unreachable ();
25882 fprintf (file, "0x");
25883 for (i = CONST_VECTOR_NUNITS (x); i--;)
25885 rtx element;
25887 element = CONST_VECTOR_ELT (x, i);
25888 fprintf (file, pattern, INTVAL (element));
25891 return 1;
25894 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
25895 HFmode constant pool entries are actually loaded with ldr. */
25896 void
25897 arm_emit_fp16_const (rtx c)
25899 REAL_VALUE_TYPE r;
25900 long bits;
25902 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
25903 bits = real_to_target (NULL, &r, HFmode);
25904 if (WORDS_BIG_ENDIAN)
25905 assemble_zeros (2);
25906 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
25907 if (!WORDS_BIG_ENDIAN)
25908 assemble_zeros (2);
25911 const char *
25912 arm_output_load_gr (rtx *operands)
25914 rtx reg;
25915 rtx offset;
25916 rtx wcgr;
25917 rtx sum;
25919 if (!MEM_P (operands [1])
25920 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
25921 || !REG_P (reg = XEXP (sum, 0))
25922 || !CONST_INT_P (offset = XEXP (sum, 1))
25923 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
25924 return "wldrw%?\t%0, %1";
25926 /* Fix up an out-of-range load of a GR register. */
25927 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
25928 wcgr = operands[0];
25929 operands[0] = reg;
25930 output_asm_insn ("ldr%?\t%0, %1", operands);
25932 operands[0] = wcgr;
25933 operands[1] = reg;
25934 output_asm_insn ("tmcr%?\t%0, %1", operands);
25935 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
25937 return "";
25940 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
25942 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
25943 named arg and all anonymous args onto the stack.
25944 XXX I know the prologue shouldn't be pushing registers, but it is faster
25945 that way. */
25947 static void
25948 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
25949 machine_mode mode,
25950 tree type,
25951 int *pretend_size,
25952 int second_time ATTRIBUTE_UNUSED)
25954 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
25955 int nregs;
25957 cfun->machine->uses_anonymous_args = 1;
25958 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
25960 nregs = pcum->aapcs_ncrn;
25961 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
25962 nregs++;
25964 else
25965 nregs = pcum->nregs;
25967 if (nregs < NUM_ARG_REGS)
25968 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
25971 /* We can't rely on the caller doing the proper promotion when
25972 using APCS or ATPCS. */
25974 static bool
25975 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
25977 return !TARGET_AAPCS_BASED;
25980 static machine_mode
25981 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
25982 machine_mode mode,
25983 int *punsignedp ATTRIBUTE_UNUSED,
25984 const_tree fntype ATTRIBUTE_UNUSED,
25985 int for_return ATTRIBUTE_UNUSED)
25987 if (GET_MODE_CLASS (mode) == MODE_INT
25988 && GET_MODE_SIZE (mode) < 4)
25989 return SImode;
25991 return mode;
25994 /* AAPCS based ABIs use short enums by default. */
25996 static bool
25997 arm_default_short_enums (void)
25999 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
26003 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26005 static bool
26006 arm_align_anon_bitfield (void)
26008 return TARGET_AAPCS_BASED;
26012 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26014 static tree
26015 arm_cxx_guard_type (void)
26017 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26021 /* The EABI says test the least significant bit of a guard variable. */
26023 static bool
26024 arm_cxx_guard_mask_bit (void)
26026 return TARGET_AAPCS_BASED;
26030 /* The EABI specifies that all array cookies are 8 bytes long. */
26032 static tree
26033 arm_get_cookie_size (tree type)
26035 tree size;
26037 if (!TARGET_AAPCS_BASED)
26038 return default_cxx_get_cookie_size (type);
26040 size = build_int_cst (sizetype, 8);
26041 return size;
26045 /* The EABI says that array cookies should also contain the element size. */
26047 static bool
26048 arm_cookie_has_size (void)
26050 return TARGET_AAPCS_BASED;
26054 /* The EABI says constructors and destructors should return a pointer to
26055 the object constructed/destroyed. */
26057 static bool
26058 arm_cxx_cdtor_returns_this (void)
26060 return TARGET_AAPCS_BASED;
26063 /* The EABI says that an inline function may never be the key
26064 method. */
26066 static bool
26067 arm_cxx_key_method_may_be_inline (void)
26069 return !TARGET_AAPCS_BASED;
26072 static void
26073 arm_cxx_determine_class_data_visibility (tree decl)
26075 if (!TARGET_AAPCS_BASED
26076 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26077 return;
26079 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26080 is exported. However, on systems without dynamic vague linkage,
26081 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26082 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26083 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26084 else
26085 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26086 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26089 static bool
26090 arm_cxx_class_data_always_comdat (void)
26092 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26093 vague linkage if the class has no key function. */
26094 return !TARGET_AAPCS_BASED;
26098 /* The EABI says __aeabi_atexit should be used to register static
26099 destructors. */
26101 static bool
26102 arm_cxx_use_aeabi_atexit (void)
26104 return TARGET_AAPCS_BASED;
26108 void
26109 arm_set_return_address (rtx source, rtx scratch)
26111 arm_stack_offsets *offsets;
26112 HOST_WIDE_INT delta;
26113 rtx addr;
26114 unsigned long saved_regs;
26116 offsets = arm_get_frame_offsets ();
26117 saved_regs = offsets->saved_regs_mask;
26119 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26120 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26121 else
26123 if (frame_pointer_needed)
26124 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26125 else
26127 /* LR will be the first saved register. */
26128 delta = offsets->outgoing_args - (offsets->frame + 4);
26131 if (delta >= 4096)
26133 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26134 GEN_INT (delta & ~4095)));
26135 addr = scratch;
26136 delta &= 4095;
26138 else
26139 addr = stack_pointer_rtx;
26141 addr = plus_constant (Pmode, addr, delta);
26143 /* The store needs to be marked as frame related in order to prevent
26144 DSE from deleting it as dead if it is based on fp. */
26145 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26146 RTX_FRAME_RELATED_P (insn) = 1;
26147 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26152 void
26153 thumb_set_return_address (rtx source, rtx scratch)
26155 arm_stack_offsets *offsets;
26156 HOST_WIDE_INT delta;
26157 HOST_WIDE_INT limit;
26158 int reg;
26159 rtx addr;
26160 unsigned long mask;
26162 emit_use (source);
26164 offsets = arm_get_frame_offsets ();
26165 mask = offsets->saved_regs_mask;
26166 if (mask & (1 << LR_REGNUM))
26168 limit = 1024;
26169 /* Find the saved regs. */
26170 if (frame_pointer_needed)
26172 delta = offsets->soft_frame - offsets->saved_args;
26173 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26174 if (TARGET_THUMB1)
26175 limit = 128;
26177 else
26179 delta = offsets->outgoing_args - offsets->saved_args;
26180 reg = SP_REGNUM;
26182 /* Allow for the stack frame. */
26183 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26184 delta -= 16;
26185 /* The link register is always the first saved register. */
26186 delta -= 4;
26188 /* Construct the address. */
26189 addr = gen_rtx_REG (SImode, reg);
26190 if (delta > limit)
26192 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26193 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26194 addr = scratch;
26196 else
26197 addr = plus_constant (Pmode, addr, delta);
26199 /* The store needs to be marked as frame related in order to prevent
26200 DSE from deleting it as dead if it is based on fp. */
26201 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26202 RTX_FRAME_RELATED_P (insn) = 1;
26203 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26205 else
26206 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26209 /* Implements target hook vector_mode_supported_p. */
26210 bool
26211 arm_vector_mode_supported_p (machine_mode mode)
26213 /* Neon also supports V2SImode, etc. listed in the clause below. */
26214 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26215 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
26216 return true;
26218 if ((TARGET_NEON || TARGET_IWMMXT)
26219 && ((mode == V2SImode)
26220 || (mode == V4HImode)
26221 || (mode == V8QImode)))
26222 return true;
26224 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26225 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26226 || mode == V2HAmode))
26227 return true;
26229 return false;
26232 /* Implements target hook array_mode_supported_p. */
26234 static bool
26235 arm_array_mode_supported_p (machine_mode mode,
26236 unsigned HOST_WIDE_INT nelems)
26238 if (TARGET_NEON
26239 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26240 && (nelems >= 2 && nelems <= 4))
26241 return true;
26243 return false;
26246 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26247 registers when autovectorizing for Neon, at least until multiple vector
26248 widths are supported properly by the middle-end. */
26250 static machine_mode
26251 arm_preferred_simd_mode (machine_mode mode)
26253 if (TARGET_NEON)
26254 switch (mode)
26256 case SFmode:
26257 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26258 case SImode:
26259 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26260 case HImode:
26261 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26262 case QImode:
26263 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26264 case DImode:
26265 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26266 return V2DImode;
26267 break;
26269 default:;
26272 if (TARGET_REALLY_IWMMXT)
26273 switch (mode)
26275 case SImode:
26276 return V2SImode;
26277 case HImode:
26278 return V4HImode;
26279 case QImode:
26280 return V8QImode;
26282 default:;
26285 return word_mode;
26288 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26290 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26291 using r0-r4 for function arguments, r7 for the stack frame and don't have
26292 enough left over to do doubleword arithmetic. For Thumb-2 all the
26293 potentially problematic instructions accept high registers so this is not
26294 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26295 that require many low registers. */
26296 static bool
26297 arm_class_likely_spilled_p (reg_class_t rclass)
26299 if ((TARGET_THUMB1 && rclass == LO_REGS)
26300 || rclass == CC_REG)
26301 return true;
26303 return false;
26306 /* Implements target hook small_register_classes_for_mode_p. */
26307 bool
26308 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26310 return TARGET_THUMB1;
26313 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26314 ARM insns and therefore guarantee that the shift count is modulo 256.
26315 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26316 guarantee no particular behavior for out-of-range counts. */
26318 static unsigned HOST_WIDE_INT
26319 arm_shift_truncation_mask (machine_mode mode)
26321 return mode == SImode ? 255 : 0;
26325 /* Map internal gcc register numbers to DWARF2 register numbers. */
26327 unsigned int
26328 arm_dbx_register_number (unsigned int regno)
26330 if (regno < 16)
26331 return regno;
26333 if (IS_VFP_REGNUM (regno))
26335 /* See comment in arm_dwarf_register_span. */
26336 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26337 return 64 + regno - FIRST_VFP_REGNUM;
26338 else
26339 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26342 if (IS_IWMMXT_GR_REGNUM (regno))
26343 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26345 if (IS_IWMMXT_REGNUM (regno))
26346 return 112 + regno - FIRST_IWMMXT_REGNUM;
26348 gcc_unreachable ();
26351 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26352 GCC models tham as 64 32-bit registers, so we need to describe this to
26353 the DWARF generation code. Other registers can use the default. */
26354 static rtx
26355 arm_dwarf_register_span (rtx rtl)
26357 machine_mode mode;
26358 unsigned regno;
26359 rtx parts[16];
26360 int nregs;
26361 int i;
26363 regno = REGNO (rtl);
26364 if (!IS_VFP_REGNUM (regno))
26365 return NULL_RTX;
26367 /* XXX FIXME: The EABI defines two VFP register ranges:
26368 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26369 256-287: D0-D31
26370 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26371 corresponding D register. Until GDB supports this, we shall use the
26372 legacy encodings. We also use these encodings for D0-D15 for
26373 compatibility with older debuggers. */
26374 mode = GET_MODE (rtl);
26375 if (GET_MODE_SIZE (mode) < 8)
26376 return NULL_RTX;
26378 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26380 nregs = GET_MODE_SIZE (mode) / 4;
26381 for (i = 0; i < nregs; i += 2)
26382 if (TARGET_BIG_END)
26384 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
26385 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
26387 else
26389 parts[i] = gen_rtx_REG (SImode, regno + i);
26390 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
26393 else
26395 nregs = GET_MODE_SIZE (mode) / 8;
26396 for (i = 0; i < nregs; i++)
26397 parts[i] = gen_rtx_REG (DImode, regno + i);
26400 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
26403 #if ARM_UNWIND_INFO
26404 /* Emit unwind directives for a store-multiple instruction or stack pointer
26405 push during alignment.
26406 These should only ever be generated by the function prologue code, so
26407 expect them to have a particular form.
26408 The store-multiple instruction sometimes pushes pc as the last register,
26409 although it should not be tracked into unwind information, or for -Os
26410 sometimes pushes some dummy registers before first register that needs
26411 to be tracked in unwind information; such dummy registers are there just
26412 to avoid separate stack adjustment, and will not be restored in the
26413 epilogue. */
26415 static void
26416 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26418 int i;
26419 HOST_WIDE_INT offset;
26420 HOST_WIDE_INT nregs;
26421 int reg_size;
26422 unsigned reg;
26423 unsigned lastreg;
26424 unsigned padfirst = 0, padlast = 0;
26425 rtx e;
26427 e = XVECEXP (p, 0, 0);
26428 gcc_assert (GET_CODE (e) == SET);
26430 /* First insn will adjust the stack pointer. */
26431 gcc_assert (GET_CODE (e) == SET
26432 && REG_P (SET_DEST (e))
26433 && REGNO (SET_DEST (e)) == SP_REGNUM
26434 && GET_CODE (SET_SRC (e)) == PLUS);
26436 offset = -INTVAL (XEXP (SET_SRC (e), 1));
26437 nregs = XVECLEN (p, 0) - 1;
26438 gcc_assert (nregs);
26440 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
26441 if (reg < 16)
26443 /* For -Os dummy registers can be pushed at the beginning to
26444 avoid separate stack pointer adjustment. */
26445 e = XVECEXP (p, 0, 1);
26446 e = XEXP (SET_DEST (e), 0);
26447 if (GET_CODE (e) == PLUS)
26448 padfirst = INTVAL (XEXP (e, 1));
26449 gcc_assert (padfirst == 0 || optimize_size);
26450 /* The function prologue may also push pc, but not annotate it as it is
26451 never restored. We turn this into a stack pointer adjustment. */
26452 e = XVECEXP (p, 0, nregs);
26453 e = XEXP (SET_DEST (e), 0);
26454 if (GET_CODE (e) == PLUS)
26455 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
26456 else
26457 padlast = offset - 4;
26458 gcc_assert (padlast == 0 || padlast == 4);
26459 if (padlast == 4)
26460 fprintf (asm_out_file, "\t.pad #4\n");
26461 reg_size = 4;
26462 fprintf (asm_out_file, "\t.save {");
26464 else if (IS_VFP_REGNUM (reg))
26466 reg_size = 8;
26467 fprintf (asm_out_file, "\t.vsave {");
26469 else
26470 /* Unknown register type. */
26471 gcc_unreachable ();
26473 /* If the stack increment doesn't match the size of the saved registers,
26474 something has gone horribly wrong. */
26475 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
26477 offset = padfirst;
26478 lastreg = 0;
26479 /* The remaining insns will describe the stores. */
26480 for (i = 1; i <= nregs; i++)
26482 /* Expect (set (mem <addr>) (reg)).
26483 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
26484 e = XVECEXP (p, 0, i);
26485 gcc_assert (GET_CODE (e) == SET
26486 && MEM_P (SET_DEST (e))
26487 && REG_P (SET_SRC (e)));
26489 reg = REGNO (SET_SRC (e));
26490 gcc_assert (reg >= lastreg);
26492 if (i != 1)
26493 fprintf (asm_out_file, ", ");
26494 /* We can't use %r for vfp because we need to use the
26495 double precision register names. */
26496 if (IS_VFP_REGNUM (reg))
26497 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
26498 else
26499 asm_fprintf (asm_out_file, "%r", reg);
26501 #ifdef ENABLE_CHECKING
26502 /* Check that the addresses are consecutive. */
26503 e = XEXP (SET_DEST (e), 0);
26504 if (GET_CODE (e) == PLUS)
26505 gcc_assert (REG_P (XEXP (e, 0))
26506 && REGNO (XEXP (e, 0)) == SP_REGNUM
26507 && CONST_INT_P (XEXP (e, 1))
26508 && offset == INTVAL (XEXP (e, 1)));
26509 else
26510 gcc_assert (i == 1
26511 && REG_P (e)
26512 && REGNO (e) == SP_REGNUM);
26513 offset += reg_size;
26514 #endif
26516 fprintf (asm_out_file, "}\n");
26517 if (padfirst)
26518 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
26521 /* Emit unwind directives for a SET. */
26523 static void
26524 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
26526 rtx e0;
26527 rtx e1;
26528 unsigned reg;
26530 e0 = XEXP (p, 0);
26531 e1 = XEXP (p, 1);
26532 switch (GET_CODE (e0))
26534 case MEM:
26535 /* Pushing a single register. */
26536 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
26537 || !REG_P (XEXP (XEXP (e0, 0), 0))
26538 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
26539 abort ();
26541 asm_fprintf (asm_out_file, "\t.save ");
26542 if (IS_VFP_REGNUM (REGNO (e1)))
26543 asm_fprintf(asm_out_file, "{d%d}\n",
26544 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
26545 else
26546 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
26547 break;
26549 case REG:
26550 if (REGNO (e0) == SP_REGNUM)
26552 /* A stack increment. */
26553 if (GET_CODE (e1) != PLUS
26554 || !REG_P (XEXP (e1, 0))
26555 || REGNO (XEXP (e1, 0)) != SP_REGNUM
26556 || !CONST_INT_P (XEXP (e1, 1)))
26557 abort ();
26559 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
26560 -INTVAL (XEXP (e1, 1)));
26562 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
26564 HOST_WIDE_INT offset;
26566 if (GET_CODE (e1) == PLUS)
26568 if (!REG_P (XEXP (e1, 0))
26569 || !CONST_INT_P (XEXP (e1, 1)))
26570 abort ();
26571 reg = REGNO (XEXP (e1, 0));
26572 offset = INTVAL (XEXP (e1, 1));
26573 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
26574 HARD_FRAME_POINTER_REGNUM, reg,
26575 offset);
26577 else if (REG_P (e1))
26579 reg = REGNO (e1);
26580 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
26581 HARD_FRAME_POINTER_REGNUM, reg);
26583 else
26584 abort ();
26586 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
26588 /* Move from sp to reg. */
26589 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
26591 else if (GET_CODE (e1) == PLUS
26592 && REG_P (XEXP (e1, 0))
26593 && REGNO (XEXP (e1, 0)) == SP_REGNUM
26594 && CONST_INT_P (XEXP (e1, 1)))
26596 /* Set reg to offset from sp. */
26597 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
26598 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
26600 else
26601 abort ();
26602 break;
26604 default:
26605 abort ();
26610 /* Emit unwind directives for the given insn. */
26612 static void
26613 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
26615 rtx note, pat;
26616 bool handled_one = false;
26618 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26619 return;
26621 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26622 && (TREE_NOTHROW (current_function_decl)
26623 || crtl->all_throwers_are_sibcalls))
26624 return;
26626 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
26627 return;
26629 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
26631 switch (REG_NOTE_KIND (note))
26633 case REG_FRAME_RELATED_EXPR:
26634 pat = XEXP (note, 0);
26635 goto found;
26637 case REG_CFA_REGISTER:
26638 pat = XEXP (note, 0);
26639 if (pat == NULL)
26641 pat = PATTERN (insn);
26642 if (GET_CODE (pat) == PARALLEL)
26643 pat = XVECEXP (pat, 0, 0);
26646 /* Only emitted for IS_STACKALIGN re-alignment. */
26648 rtx dest, src;
26649 unsigned reg;
26651 src = SET_SRC (pat);
26652 dest = SET_DEST (pat);
26654 gcc_assert (src == stack_pointer_rtx);
26655 reg = REGNO (dest);
26656 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26657 reg + 0x90, reg);
26659 handled_one = true;
26660 break;
26662 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
26663 to get correct dwarf information for shrink-wrap. We should not
26664 emit unwind information for it because these are used either for
26665 pretend arguments or notes to adjust sp and restore registers from
26666 stack. */
26667 case REG_CFA_DEF_CFA:
26668 case REG_CFA_ADJUST_CFA:
26669 case REG_CFA_RESTORE:
26670 return;
26672 case REG_CFA_EXPRESSION:
26673 case REG_CFA_OFFSET:
26674 /* ??? Only handling here what we actually emit. */
26675 gcc_unreachable ();
26677 default:
26678 break;
26681 if (handled_one)
26682 return;
26683 pat = PATTERN (insn);
26684 found:
26686 switch (GET_CODE (pat))
26688 case SET:
26689 arm_unwind_emit_set (asm_out_file, pat);
26690 break;
26692 case SEQUENCE:
26693 /* Store multiple. */
26694 arm_unwind_emit_sequence (asm_out_file, pat);
26695 break;
26697 default:
26698 abort();
26703 /* Output a reference from a function exception table to the type_info
26704 object X. The EABI specifies that the symbol should be relocated by
26705 an R_ARM_TARGET2 relocation. */
26707 static bool
26708 arm_output_ttype (rtx x)
26710 fputs ("\t.word\t", asm_out_file);
26711 output_addr_const (asm_out_file, x);
26712 /* Use special relocations for symbol references. */
26713 if (!CONST_INT_P (x))
26714 fputs ("(TARGET2)", asm_out_file);
26715 fputc ('\n', asm_out_file);
26717 return TRUE;
26720 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
26722 static void
26723 arm_asm_emit_except_personality (rtx personality)
26725 fputs ("\t.personality\t", asm_out_file);
26726 output_addr_const (asm_out_file, personality);
26727 fputc ('\n', asm_out_file);
26730 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
26732 static void
26733 arm_asm_init_sections (void)
26735 exception_section = get_unnamed_section (0, output_section_asm_op,
26736 "\t.handlerdata");
26738 #endif /* ARM_UNWIND_INFO */
26740 /* Output unwind directives for the start/end of a function. */
26742 void
26743 arm_output_fn_unwind (FILE * f, bool prologue)
26745 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26746 return;
26748 if (prologue)
26749 fputs ("\t.fnstart\n", f);
26750 else
26752 /* If this function will never be unwound, then mark it as such.
26753 The came condition is used in arm_unwind_emit to suppress
26754 the frame annotations. */
26755 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26756 && (TREE_NOTHROW (current_function_decl)
26757 || crtl->all_throwers_are_sibcalls))
26758 fputs("\t.cantunwind\n", f);
26760 fputs ("\t.fnend\n", f);
26764 static bool
26765 arm_emit_tls_decoration (FILE *fp, rtx x)
26767 enum tls_reloc reloc;
26768 rtx val;
26770 val = XVECEXP (x, 0, 0);
26771 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
26773 output_addr_const (fp, val);
26775 switch (reloc)
26777 case TLS_GD32:
26778 fputs ("(tlsgd)", fp);
26779 break;
26780 case TLS_LDM32:
26781 fputs ("(tlsldm)", fp);
26782 break;
26783 case TLS_LDO32:
26784 fputs ("(tlsldo)", fp);
26785 break;
26786 case TLS_IE32:
26787 fputs ("(gottpoff)", fp);
26788 break;
26789 case TLS_LE32:
26790 fputs ("(tpoff)", fp);
26791 break;
26792 case TLS_DESCSEQ:
26793 fputs ("(tlsdesc)", fp);
26794 break;
26795 default:
26796 gcc_unreachable ();
26799 switch (reloc)
26801 case TLS_GD32:
26802 case TLS_LDM32:
26803 case TLS_IE32:
26804 case TLS_DESCSEQ:
26805 fputs (" + (. - ", fp);
26806 output_addr_const (fp, XVECEXP (x, 0, 2));
26807 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
26808 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
26809 output_addr_const (fp, XVECEXP (x, 0, 3));
26810 fputc (')', fp);
26811 break;
26812 default:
26813 break;
26816 return TRUE;
26819 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
26821 static void
26822 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
26824 gcc_assert (size == 4);
26825 fputs ("\t.word\t", file);
26826 output_addr_const (file, x);
26827 fputs ("(tlsldo)", file);
26830 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
26832 static bool
26833 arm_output_addr_const_extra (FILE *fp, rtx x)
26835 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
26836 return arm_emit_tls_decoration (fp, x);
26837 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
26839 char label[256];
26840 int labelno = INTVAL (XVECEXP (x, 0, 0));
26842 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
26843 assemble_name_raw (fp, label);
26845 return TRUE;
26847 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
26849 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
26850 if (GOT_PCREL)
26851 fputs ("+.", fp);
26852 fputs ("-(", fp);
26853 output_addr_const (fp, XVECEXP (x, 0, 0));
26854 fputc (')', fp);
26855 return TRUE;
26857 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
26859 output_addr_const (fp, XVECEXP (x, 0, 0));
26860 if (GOT_PCREL)
26861 fputs ("+.", fp);
26862 fputs ("-(", fp);
26863 output_addr_const (fp, XVECEXP (x, 0, 1));
26864 fputc (')', fp);
26865 return TRUE;
26867 else if (GET_CODE (x) == CONST_VECTOR)
26868 return arm_emit_vector_const (fp, x);
26870 return FALSE;
26873 /* Output assembly for a shift instruction.
26874 SET_FLAGS determines how the instruction modifies the condition codes.
26875 0 - Do not set condition codes.
26876 1 - Set condition codes.
26877 2 - Use smallest instruction. */
26878 const char *
26879 arm_output_shift(rtx * operands, int set_flags)
26881 char pattern[100];
26882 static const char flag_chars[3] = {'?', '.', '!'};
26883 const char *shift;
26884 HOST_WIDE_INT val;
26885 char c;
26887 c = flag_chars[set_flags];
26888 if (TARGET_UNIFIED_ASM)
26890 shift = shift_op(operands[3], &val);
26891 if (shift)
26893 if (val != -1)
26894 operands[2] = GEN_INT(val);
26895 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
26897 else
26898 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
26900 else
26901 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
26902 output_asm_insn (pattern, operands);
26903 return "";
26906 /* Output assembly for a WMMX immediate shift instruction. */
26907 const char *
26908 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
26910 int shift = INTVAL (operands[2]);
26911 char templ[50];
26912 machine_mode opmode = GET_MODE (operands[0]);
26914 gcc_assert (shift >= 0);
26916 /* If the shift value in the register versions is > 63 (for D qualifier),
26917 31 (for W qualifier) or 15 (for H qualifier). */
26918 if (((opmode == V4HImode) && (shift > 15))
26919 || ((opmode == V2SImode) && (shift > 31))
26920 || ((opmode == DImode) && (shift > 63)))
26922 if (wror_or_wsra)
26924 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26925 output_asm_insn (templ, operands);
26926 if (opmode == DImode)
26928 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
26929 output_asm_insn (templ, operands);
26932 else
26934 /* The destination register will contain all zeros. */
26935 sprintf (templ, "wzero\t%%0");
26936 output_asm_insn (templ, operands);
26938 return "";
26941 if ((opmode == DImode) && (shift > 32))
26943 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26944 output_asm_insn (templ, operands);
26945 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
26946 output_asm_insn (templ, operands);
26948 else
26950 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
26951 output_asm_insn (templ, operands);
26953 return "";
26956 /* Output assembly for a WMMX tinsr instruction. */
26957 const char *
26958 arm_output_iwmmxt_tinsr (rtx *operands)
26960 int mask = INTVAL (operands[3]);
26961 int i;
26962 char templ[50];
26963 int units = mode_nunits[GET_MODE (operands[0])];
26964 gcc_assert ((mask & (mask - 1)) == 0);
26965 for (i = 0; i < units; ++i)
26967 if ((mask & 0x01) == 1)
26969 break;
26971 mask >>= 1;
26973 gcc_assert (i < units);
26975 switch (GET_MODE (operands[0]))
26977 case V8QImode:
26978 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
26979 break;
26980 case V4HImode:
26981 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
26982 break;
26983 case V2SImode:
26984 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
26985 break;
26986 default:
26987 gcc_unreachable ();
26988 break;
26990 output_asm_insn (templ, operands);
26992 return "";
26995 /* Output a Thumb-1 casesi dispatch sequence. */
26996 const char *
26997 thumb1_output_casesi (rtx *operands)
26999 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27001 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27003 switch (GET_MODE(diff_vec))
27005 case QImode:
27006 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27007 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27008 case HImode:
27009 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27010 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27011 case SImode:
27012 return "bl\t%___gnu_thumb1_case_si";
27013 default:
27014 gcc_unreachable ();
27018 /* Output a Thumb-2 casesi instruction. */
27019 const char *
27020 thumb2_output_casesi (rtx *operands)
27022 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27024 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27026 output_asm_insn ("cmp\t%0, %1", operands);
27027 output_asm_insn ("bhi\t%l3", operands);
27028 switch (GET_MODE(diff_vec))
27030 case QImode:
27031 return "tbb\t[%|pc, %0]";
27032 case HImode:
27033 return "tbh\t[%|pc, %0, lsl #1]";
27034 case SImode:
27035 if (flag_pic)
27037 output_asm_insn ("adr\t%4, %l2", operands);
27038 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27039 output_asm_insn ("add\t%4, %4, %5", operands);
27040 return "bx\t%4";
27042 else
27044 output_asm_insn ("adr\t%4, %l2", operands);
27045 return "ldr\t%|pc, [%4, %0, lsl #2]";
27047 default:
27048 gcc_unreachable ();
27052 /* Most ARM cores are single issue, but some newer ones can dual issue.
27053 The scheduler descriptions rely on this being correct. */
27054 static int
27055 arm_issue_rate (void)
27057 switch (arm_tune)
27059 case cortexa15:
27060 case cortexa57:
27061 return 3;
27063 case cortexm7:
27064 case cortexr4:
27065 case cortexr4f:
27066 case cortexr5:
27067 case genericv7a:
27068 case cortexa5:
27069 case cortexa7:
27070 case cortexa8:
27071 case cortexa9:
27072 case cortexa12:
27073 case cortexa53:
27074 case fa726te:
27075 case marvell_pj4:
27076 return 2;
27078 default:
27079 return 1;
27083 const char *
27084 arm_mangle_type (const_tree type)
27086 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27087 has to be managled as if it is in the "std" namespace. */
27088 if (TARGET_AAPCS_BASED
27089 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27090 return "St9__va_list";
27092 /* Half-precision float. */
27093 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27094 return "Dh";
27096 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27097 builtin type. */
27098 if (TYPE_NAME (type) != NULL)
27099 return arm_mangle_builtin_type (type);
27101 /* Use the default mangling. */
27102 return NULL;
27105 /* Order of allocation of core registers for Thumb: this allocation is
27106 written over the corresponding initial entries of the array
27107 initialized with REG_ALLOC_ORDER. We allocate all low registers
27108 first. Saving and restoring a low register is usually cheaper than
27109 using a call-clobbered high register. */
27111 static const int thumb_core_reg_alloc_order[] =
27113 3, 2, 1, 0, 4, 5, 6, 7,
27114 14, 12, 8, 9, 10, 11
27117 /* Adjust register allocation order when compiling for Thumb. */
27119 void
27120 arm_order_regs_for_local_alloc (void)
27122 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27123 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27124 if (TARGET_THUMB)
27125 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27126 sizeof (thumb_core_reg_alloc_order));
27129 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27131 bool
27132 arm_frame_pointer_required (void)
27134 return (cfun->has_nonlocal_label
27135 || SUBTARGET_FRAME_POINTER_REQUIRED
27136 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
27139 /* Only thumb1 can't support conditional execution, so return true if
27140 the target is not thumb1. */
27141 static bool
27142 arm_have_conditional_execution (void)
27144 return !TARGET_THUMB1;
27147 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27148 static HOST_WIDE_INT
27149 arm_vector_alignment (const_tree type)
27151 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27153 if (TARGET_AAPCS_BASED)
27154 align = MIN (align, 64);
27156 return align;
27159 static unsigned int
27160 arm_autovectorize_vector_sizes (void)
27162 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27165 static bool
27166 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27168 /* Vectors which aren't in packed structures will not be less aligned than
27169 the natural alignment of their element type, so this is safe. */
27170 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27171 return !is_packed;
27173 return default_builtin_vector_alignment_reachable (type, is_packed);
27176 static bool
27177 arm_builtin_support_vector_misalignment (machine_mode mode,
27178 const_tree type, int misalignment,
27179 bool is_packed)
27181 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27183 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27185 if (is_packed)
27186 return align == 1;
27188 /* If the misalignment is unknown, we should be able to handle the access
27189 so long as it is not to a member of a packed data structure. */
27190 if (misalignment == -1)
27191 return true;
27193 /* Return true if the misalignment is a multiple of the natural alignment
27194 of the vector's element type. This is probably always going to be
27195 true in practice, since we've already established that this isn't a
27196 packed access. */
27197 return ((misalignment % align) == 0);
27200 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27201 is_packed);
27204 static void
27205 arm_conditional_register_usage (void)
27207 int regno;
27209 if (TARGET_THUMB1 && optimize_size)
27211 /* When optimizing for size on Thumb-1, it's better not
27212 to use the HI regs, because of the overhead of
27213 stacking them. */
27214 for (regno = FIRST_HI_REGNUM;
27215 regno <= LAST_HI_REGNUM; ++regno)
27216 fixed_regs[regno] = call_used_regs[regno] = 1;
27219 /* The link register can be clobbered by any branch insn,
27220 but we have no way to track that at present, so mark
27221 it as unavailable. */
27222 if (TARGET_THUMB1)
27223 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27225 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
27227 /* VFPv3 registers are disabled when earlier VFP
27228 versions are selected due to the definition of
27229 LAST_VFP_REGNUM. */
27230 for (regno = FIRST_VFP_REGNUM;
27231 regno <= LAST_VFP_REGNUM; ++ regno)
27233 fixed_regs[regno] = 0;
27234 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27235 || regno >= FIRST_VFP_REGNUM + 32;
27239 if (TARGET_REALLY_IWMMXT)
27241 regno = FIRST_IWMMXT_GR_REGNUM;
27242 /* The 2002/10/09 revision of the XScale ABI has wCG0
27243 and wCG1 as call-preserved registers. The 2002/11/21
27244 revision changed this so that all wCG registers are
27245 scratch registers. */
27246 for (regno = FIRST_IWMMXT_GR_REGNUM;
27247 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27248 fixed_regs[regno] = 0;
27249 /* The XScale ABI has wR0 - wR9 as scratch registers,
27250 the rest as call-preserved registers. */
27251 for (regno = FIRST_IWMMXT_REGNUM;
27252 regno <= LAST_IWMMXT_REGNUM; ++ regno)
27254 fixed_regs[regno] = 0;
27255 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27259 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27261 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27262 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27264 else if (TARGET_APCS_STACK)
27266 fixed_regs[10] = 1;
27267 call_used_regs[10] = 1;
27269 /* -mcaller-super-interworking reserves r11 for calls to
27270 _interwork_r11_call_via_rN(). Making the register global
27271 is an easy way of ensuring that it remains valid for all
27272 calls. */
27273 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27274 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27276 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27277 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27278 if (TARGET_CALLER_INTERWORKING)
27279 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27281 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27284 static reg_class_t
27285 arm_preferred_rename_class (reg_class_t rclass)
27287 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27288 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27289 and code size can be reduced. */
27290 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27291 return LO_REGS;
27292 else
27293 return NO_REGS;
27296 /* Compute the atrribute "length" of insn "*push_multi".
27297 So this function MUST be kept in sync with that insn pattern. */
27299 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27301 int i, regno, hi_reg;
27302 int num_saves = XVECLEN (parallel_op, 0);
27304 /* ARM mode. */
27305 if (TARGET_ARM)
27306 return 4;
27307 /* Thumb1 mode. */
27308 if (TARGET_THUMB1)
27309 return 2;
27311 /* Thumb2 mode. */
27312 regno = REGNO (first_op);
27313 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27314 for (i = 1; i < num_saves && !hi_reg; i++)
27316 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27317 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27320 if (!hi_reg)
27321 return 2;
27322 return 4;
27325 /* Compute the number of instructions emitted by output_move_double. */
27327 arm_count_output_move_double_insns (rtx *operands)
27329 int count;
27330 rtx ops[2];
27331 /* output_move_double may modify the operands array, so call it
27332 here on a copy of the array. */
27333 ops[0] = operands[0];
27334 ops[1] = operands[1];
27335 output_move_double (ops, false, &count);
27336 return count;
27340 vfp3_const_double_for_fract_bits (rtx operand)
27342 REAL_VALUE_TYPE r0;
27344 if (!CONST_DOUBLE_P (operand))
27345 return 0;
27347 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27348 if (exact_real_inverse (DFmode, &r0))
27350 if (exact_real_truncate (DFmode, &r0))
27352 HOST_WIDE_INT value = real_to_integer (&r0);
27353 value = value & 0xffffffff;
27354 if ((value != 0) && ( (value & (value - 1)) == 0))
27355 return int_log2 (value);
27358 return 0;
27362 vfp3_const_double_for_bits (rtx operand)
27364 REAL_VALUE_TYPE r0;
27366 if (!CONST_DOUBLE_P (operand))
27367 return 0;
27369 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27370 if (exact_real_truncate (DFmode, &r0))
27372 HOST_WIDE_INT value = real_to_integer (&r0);
27373 value = value & 0xffffffff;
27374 if ((value != 0) && ( (value & (value - 1)) == 0))
27375 return int_log2 (value);
27378 return 0;
27381 /* Emit a memory barrier around an atomic sequence according to MODEL. */
27383 static void
27384 arm_pre_atomic_barrier (enum memmodel model)
27386 if (need_atomic_barrier_p (model, true))
27387 emit_insn (gen_memory_barrier ());
27390 static void
27391 arm_post_atomic_barrier (enum memmodel model)
27393 if (need_atomic_barrier_p (model, false))
27394 emit_insn (gen_memory_barrier ());
27397 /* Emit the load-exclusive and store-exclusive instructions.
27398 Use acquire and release versions if necessary. */
27400 static void
27401 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
27403 rtx (*gen) (rtx, rtx);
27405 if (acq)
27407 switch (mode)
27409 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
27410 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
27411 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
27412 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
27413 default:
27414 gcc_unreachable ();
27417 else
27419 switch (mode)
27421 case QImode: gen = gen_arm_load_exclusiveqi; break;
27422 case HImode: gen = gen_arm_load_exclusivehi; break;
27423 case SImode: gen = gen_arm_load_exclusivesi; break;
27424 case DImode: gen = gen_arm_load_exclusivedi; break;
27425 default:
27426 gcc_unreachable ();
27430 emit_insn (gen (rval, mem));
27433 static void
27434 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
27435 rtx mem, bool rel)
27437 rtx (*gen) (rtx, rtx, rtx);
27439 if (rel)
27441 switch (mode)
27443 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
27444 case HImode: gen = gen_arm_store_release_exclusivehi; break;
27445 case SImode: gen = gen_arm_store_release_exclusivesi; break;
27446 case DImode: gen = gen_arm_store_release_exclusivedi; break;
27447 default:
27448 gcc_unreachable ();
27451 else
27453 switch (mode)
27455 case QImode: gen = gen_arm_store_exclusiveqi; break;
27456 case HImode: gen = gen_arm_store_exclusivehi; break;
27457 case SImode: gen = gen_arm_store_exclusivesi; break;
27458 case DImode: gen = gen_arm_store_exclusivedi; break;
27459 default:
27460 gcc_unreachable ();
27464 emit_insn (gen (bval, rval, mem));
27467 /* Mark the previous jump instruction as unlikely. */
27469 static void
27470 emit_unlikely_jump (rtx insn)
27472 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
27474 insn = emit_jump_insn (insn);
27475 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
27478 /* Expand a compare and swap pattern. */
27480 void
27481 arm_expand_compare_and_swap (rtx operands[])
27483 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
27484 machine_mode mode;
27485 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
27487 bval = operands[0];
27488 rval = operands[1];
27489 mem = operands[2];
27490 oldval = operands[3];
27491 newval = operands[4];
27492 is_weak = operands[5];
27493 mod_s = operands[6];
27494 mod_f = operands[7];
27495 mode = GET_MODE (mem);
27497 /* Normally the succ memory model must be stronger than fail, but in the
27498 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27499 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
27501 if (TARGET_HAVE_LDACQ
27502 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
27503 && INTVAL (mod_s) == MEMMODEL_RELEASE)
27504 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
27506 switch (mode)
27508 case QImode:
27509 case HImode:
27510 /* For narrow modes, we're going to perform the comparison in SImode,
27511 so do the zero-extension now. */
27512 rval = gen_reg_rtx (SImode);
27513 oldval = convert_modes (SImode, mode, oldval, true);
27514 /* FALLTHRU */
27516 case SImode:
27517 /* Force the value into a register if needed. We waited until after
27518 the zero-extension above to do this properly. */
27519 if (!arm_add_operand (oldval, SImode))
27520 oldval = force_reg (SImode, oldval);
27521 break;
27523 case DImode:
27524 if (!cmpdi_operand (oldval, mode))
27525 oldval = force_reg (mode, oldval);
27526 break;
27528 default:
27529 gcc_unreachable ();
27532 switch (mode)
27534 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
27535 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
27536 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
27537 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
27538 default:
27539 gcc_unreachable ();
27542 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
27544 if (mode == QImode || mode == HImode)
27545 emit_move_insn (operands[1], gen_lowpart (mode, rval));
27547 /* In all cases, we arrange for success to be signaled by Z set.
27548 This arrangement allows for the boolean result to be used directly
27549 in a subsequent branch, post optimization. */
27550 x = gen_rtx_REG (CCmode, CC_REGNUM);
27551 x = gen_rtx_EQ (SImode, x, const0_rtx);
27552 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
27555 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
27556 another memory store between the load-exclusive and store-exclusive can
27557 reset the monitor from Exclusive to Open state. This means we must wait
27558 until after reload to split the pattern, lest we get a register spill in
27559 the middle of the atomic sequence. */
27561 void
27562 arm_split_compare_and_swap (rtx operands[])
27564 rtx rval, mem, oldval, newval, scratch;
27565 machine_mode mode;
27566 enum memmodel mod_s, mod_f;
27567 bool is_weak;
27568 rtx_code_label *label1, *label2;
27569 rtx x, cond;
27571 rval = operands[0];
27572 mem = operands[1];
27573 oldval = operands[2];
27574 newval = operands[3];
27575 is_weak = (operands[4] != const0_rtx);
27576 mod_s = (enum memmodel) INTVAL (operands[5]);
27577 mod_f = (enum memmodel) INTVAL (operands[6]);
27578 scratch = operands[7];
27579 mode = GET_MODE (mem);
27581 bool use_acquire = TARGET_HAVE_LDACQ
27582 && !(mod_s == MEMMODEL_RELAXED
27583 || mod_s == MEMMODEL_CONSUME
27584 || mod_s == MEMMODEL_RELEASE);
27586 bool use_release = TARGET_HAVE_LDACQ
27587 && !(mod_s == MEMMODEL_RELAXED
27588 || mod_s == MEMMODEL_CONSUME
27589 || mod_s == MEMMODEL_ACQUIRE);
27591 /* Checks whether a barrier is needed and emits one accordingly. */
27592 if (!(use_acquire || use_release))
27593 arm_pre_atomic_barrier (mod_s);
27595 label1 = NULL;
27596 if (!is_weak)
27598 label1 = gen_label_rtx ();
27599 emit_label (label1);
27601 label2 = gen_label_rtx ();
27603 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
27605 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
27606 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27607 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27608 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
27609 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
27611 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
27613 /* Weak or strong, we want EQ to be true for success, so that we
27614 match the flags that we got from the compare above. */
27615 cond = gen_rtx_REG (CCmode, CC_REGNUM);
27616 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
27617 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
27619 if (!is_weak)
27621 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27622 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27623 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
27624 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
27627 if (mod_f != MEMMODEL_RELAXED)
27628 emit_label (label2);
27630 /* Checks whether a barrier is needed and emits one accordingly. */
27631 if (!(use_acquire || use_release))
27632 arm_post_atomic_barrier (mod_s);
27634 if (mod_f == MEMMODEL_RELAXED)
27635 emit_label (label2);
27638 void
27639 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
27640 rtx value, rtx model_rtx, rtx cond)
27642 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
27643 machine_mode mode = GET_MODE (mem);
27644 machine_mode wmode = (mode == DImode ? DImode : SImode);
27645 rtx_code_label *label;
27646 rtx x;
27648 bool use_acquire = TARGET_HAVE_LDACQ
27649 && !(model == MEMMODEL_RELAXED
27650 || model == MEMMODEL_CONSUME
27651 || model == MEMMODEL_RELEASE);
27653 bool use_release = TARGET_HAVE_LDACQ
27654 && !(model == MEMMODEL_RELAXED
27655 || model == MEMMODEL_CONSUME
27656 || model == MEMMODEL_ACQUIRE);
27658 /* Checks whether a barrier is needed and emits one accordingly. */
27659 if (!(use_acquire || use_release))
27660 arm_pre_atomic_barrier (model);
27662 label = gen_label_rtx ();
27663 emit_label (label);
27665 if (new_out)
27666 new_out = gen_lowpart (wmode, new_out);
27667 if (old_out)
27668 old_out = gen_lowpart (wmode, old_out);
27669 else
27670 old_out = new_out;
27671 value = simplify_gen_subreg (wmode, value, mode, 0);
27673 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
27675 switch (code)
27677 case SET:
27678 new_out = value;
27679 break;
27681 case NOT:
27682 x = gen_rtx_AND (wmode, old_out, value);
27683 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27684 x = gen_rtx_NOT (wmode, new_out);
27685 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27686 break;
27688 case MINUS:
27689 if (CONST_INT_P (value))
27691 value = GEN_INT (-INTVAL (value));
27692 code = PLUS;
27694 /* FALLTHRU */
27696 case PLUS:
27697 if (mode == DImode)
27699 /* DImode plus/minus need to clobber flags. */
27700 /* The adddi3 and subdi3 patterns are incorrectly written so that
27701 they require matching operands, even when we could easily support
27702 three operands. Thankfully, this can be fixed up post-splitting,
27703 as the individual add+adc patterns do accept three operands and
27704 post-reload cprop can make these moves go away. */
27705 emit_move_insn (new_out, old_out);
27706 if (code == PLUS)
27707 x = gen_adddi3 (new_out, new_out, value);
27708 else
27709 x = gen_subdi3 (new_out, new_out, value);
27710 emit_insn (x);
27711 break;
27713 /* FALLTHRU */
27715 default:
27716 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
27717 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27718 break;
27721 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
27722 use_release);
27724 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27725 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
27727 /* Checks whether a barrier is needed and emits one accordingly. */
27728 if (!(use_acquire || use_release))
27729 arm_post_atomic_barrier (model);
27732 #define MAX_VECT_LEN 16
27734 struct expand_vec_perm_d
27736 rtx target, op0, op1;
27737 unsigned char perm[MAX_VECT_LEN];
27738 machine_mode vmode;
27739 unsigned char nelt;
27740 bool one_vector_p;
27741 bool testing_p;
27744 /* Generate a variable permutation. */
27746 static void
27747 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
27749 machine_mode vmode = GET_MODE (target);
27750 bool one_vector_p = rtx_equal_p (op0, op1);
27752 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
27753 gcc_checking_assert (GET_MODE (op0) == vmode);
27754 gcc_checking_assert (GET_MODE (op1) == vmode);
27755 gcc_checking_assert (GET_MODE (sel) == vmode);
27756 gcc_checking_assert (TARGET_NEON);
27758 if (one_vector_p)
27760 if (vmode == V8QImode)
27761 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
27762 else
27763 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
27765 else
27767 rtx pair;
27769 if (vmode == V8QImode)
27771 pair = gen_reg_rtx (V16QImode);
27772 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
27773 pair = gen_lowpart (TImode, pair);
27774 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
27776 else
27778 pair = gen_reg_rtx (OImode);
27779 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
27780 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
27785 void
27786 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
27788 machine_mode vmode = GET_MODE (target);
27789 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
27790 bool one_vector_p = rtx_equal_p (op0, op1);
27791 rtx rmask[MAX_VECT_LEN], mask;
27793 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
27794 numbering of elements for big-endian, we must reverse the order. */
27795 gcc_checking_assert (!BYTES_BIG_ENDIAN);
27797 /* The VTBL instruction does not use a modulo index, so we must take care
27798 of that ourselves. */
27799 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
27800 for (i = 0; i < nelt; ++i)
27801 rmask[i] = mask;
27802 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
27803 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
27805 arm_expand_vec_perm_1 (target, op0, op1, sel);
27808 /* Generate or test for an insn that supports a constant permutation. */
27810 /* Recognize patterns for the VUZP insns. */
27812 static bool
27813 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
27815 unsigned int i, odd, mask, nelt = d->nelt;
27816 rtx out0, out1, in0, in1, x;
27817 rtx (*gen)(rtx, rtx, rtx, rtx);
27819 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27820 return false;
27822 /* Note that these are little-endian tests. Adjust for big-endian later. */
27823 if (d->perm[0] == 0)
27824 odd = 0;
27825 else if (d->perm[0] == 1)
27826 odd = 1;
27827 else
27828 return false;
27829 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27831 for (i = 0; i < nelt; i++)
27833 unsigned elt = (i * 2 + odd) & mask;
27834 if (d->perm[i] != elt)
27835 return false;
27838 /* Success! */
27839 if (d->testing_p)
27840 return true;
27842 switch (d->vmode)
27844 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
27845 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
27846 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
27847 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
27848 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
27849 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
27850 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
27851 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
27852 default:
27853 gcc_unreachable ();
27856 in0 = d->op0;
27857 in1 = d->op1;
27858 if (BYTES_BIG_ENDIAN)
27860 x = in0, in0 = in1, in1 = x;
27861 odd = !odd;
27864 out0 = d->target;
27865 out1 = gen_reg_rtx (d->vmode);
27866 if (odd)
27867 x = out0, out0 = out1, out1 = x;
27869 emit_insn (gen (out0, in0, in1, out1));
27870 return true;
27873 /* Recognize patterns for the VZIP insns. */
27875 static bool
27876 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
27878 unsigned int i, high, mask, nelt = d->nelt;
27879 rtx out0, out1, in0, in1, x;
27880 rtx (*gen)(rtx, rtx, rtx, rtx);
27882 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27883 return false;
27885 /* Note that these are little-endian tests. Adjust for big-endian later. */
27886 high = nelt / 2;
27887 if (d->perm[0] == high)
27889 else if (d->perm[0] == 0)
27890 high = 0;
27891 else
27892 return false;
27893 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27895 for (i = 0; i < nelt / 2; i++)
27897 unsigned elt = (i + high) & mask;
27898 if (d->perm[i * 2] != elt)
27899 return false;
27900 elt = (elt + nelt) & mask;
27901 if (d->perm[i * 2 + 1] != elt)
27902 return false;
27905 /* Success! */
27906 if (d->testing_p)
27907 return true;
27909 switch (d->vmode)
27911 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
27912 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
27913 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
27914 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
27915 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
27916 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
27917 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
27918 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
27919 default:
27920 gcc_unreachable ();
27923 in0 = d->op0;
27924 in1 = d->op1;
27925 if (BYTES_BIG_ENDIAN)
27927 x = in0, in0 = in1, in1 = x;
27928 high = !high;
27931 out0 = d->target;
27932 out1 = gen_reg_rtx (d->vmode);
27933 if (high)
27934 x = out0, out0 = out1, out1 = x;
27936 emit_insn (gen (out0, in0, in1, out1));
27937 return true;
27940 /* Recognize patterns for the VREV insns. */
27942 static bool
27943 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
27945 unsigned int i, j, diff, nelt = d->nelt;
27946 rtx (*gen)(rtx, rtx);
27948 if (!d->one_vector_p)
27949 return false;
27951 diff = d->perm[0];
27952 switch (diff)
27954 case 7:
27955 switch (d->vmode)
27957 case V16QImode: gen = gen_neon_vrev64v16qi; break;
27958 case V8QImode: gen = gen_neon_vrev64v8qi; break;
27959 default:
27960 return false;
27962 break;
27963 case 3:
27964 switch (d->vmode)
27966 case V16QImode: gen = gen_neon_vrev32v16qi; break;
27967 case V8QImode: gen = gen_neon_vrev32v8qi; break;
27968 case V8HImode: gen = gen_neon_vrev64v8hi; break;
27969 case V4HImode: gen = gen_neon_vrev64v4hi; break;
27970 default:
27971 return false;
27973 break;
27974 case 1:
27975 switch (d->vmode)
27977 case V16QImode: gen = gen_neon_vrev16v16qi; break;
27978 case V8QImode: gen = gen_neon_vrev16v8qi; break;
27979 case V8HImode: gen = gen_neon_vrev32v8hi; break;
27980 case V4HImode: gen = gen_neon_vrev32v4hi; break;
27981 case V4SImode: gen = gen_neon_vrev64v4si; break;
27982 case V2SImode: gen = gen_neon_vrev64v2si; break;
27983 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
27984 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
27985 default:
27986 return false;
27988 break;
27989 default:
27990 return false;
27993 for (i = 0; i < nelt ; i += diff + 1)
27994 for (j = 0; j <= diff; j += 1)
27996 /* This is guaranteed to be true as the value of diff
27997 is 7, 3, 1 and we should have enough elements in the
27998 queue to generate this. Getting a vector mask with a
27999 value of diff other than these values implies that
28000 something is wrong by the time we get here. */
28001 gcc_assert (i + j < nelt);
28002 if (d->perm[i + j] != i + diff - j)
28003 return false;
28006 /* Success! */
28007 if (d->testing_p)
28008 return true;
28010 emit_insn (gen (d->target, d->op0));
28011 return true;
28014 /* Recognize patterns for the VTRN insns. */
28016 static bool
28017 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
28019 unsigned int i, odd, mask, nelt = d->nelt;
28020 rtx out0, out1, in0, in1, x;
28021 rtx (*gen)(rtx, rtx, rtx, rtx);
28023 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28024 return false;
28026 /* Note that these are little-endian tests. Adjust for big-endian later. */
28027 if (d->perm[0] == 0)
28028 odd = 0;
28029 else if (d->perm[0] == 1)
28030 odd = 1;
28031 else
28032 return false;
28033 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28035 for (i = 0; i < nelt; i += 2)
28037 if (d->perm[i] != i + odd)
28038 return false;
28039 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28040 return false;
28043 /* Success! */
28044 if (d->testing_p)
28045 return true;
28047 switch (d->vmode)
28049 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28050 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
28051 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
28052 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
28053 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
28054 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
28055 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
28056 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
28057 default:
28058 gcc_unreachable ();
28061 in0 = d->op0;
28062 in1 = d->op1;
28063 if (BYTES_BIG_ENDIAN)
28065 x = in0, in0 = in1, in1 = x;
28066 odd = !odd;
28069 out0 = d->target;
28070 out1 = gen_reg_rtx (d->vmode);
28071 if (odd)
28072 x = out0, out0 = out1, out1 = x;
28074 emit_insn (gen (out0, in0, in1, out1));
28075 return true;
28078 /* Recognize patterns for the VEXT insns. */
28080 static bool
28081 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
28083 unsigned int i, nelt = d->nelt;
28084 rtx (*gen) (rtx, rtx, rtx, rtx);
28085 rtx offset;
28087 unsigned int location;
28089 unsigned int next = d->perm[0] + 1;
28091 /* TODO: Handle GCC's numbering of elements for big-endian. */
28092 if (BYTES_BIG_ENDIAN)
28093 return false;
28095 /* Check if the extracted indexes are increasing by one. */
28096 for (i = 1; i < nelt; next++, i++)
28098 /* If we hit the most significant element of the 2nd vector in
28099 the previous iteration, no need to test further. */
28100 if (next == 2 * nelt)
28101 return false;
28103 /* If we are operating on only one vector: it could be a
28104 rotation. If there are only two elements of size < 64, let
28105 arm_evpc_neon_vrev catch it. */
28106 if (d->one_vector_p && (next == nelt))
28108 if ((nelt == 2) && (d->vmode != V2DImode))
28109 return false;
28110 else
28111 next = 0;
28114 if (d->perm[i] != next)
28115 return false;
28118 location = d->perm[0];
28120 switch (d->vmode)
28122 case V16QImode: gen = gen_neon_vextv16qi; break;
28123 case V8QImode: gen = gen_neon_vextv8qi; break;
28124 case V4HImode: gen = gen_neon_vextv4hi; break;
28125 case V8HImode: gen = gen_neon_vextv8hi; break;
28126 case V2SImode: gen = gen_neon_vextv2si; break;
28127 case V4SImode: gen = gen_neon_vextv4si; break;
28128 case V2SFmode: gen = gen_neon_vextv2sf; break;
28129 case V4SFmode: gen = gen_neon_vextv4sf; break;
28130 case V2DImode: gen = gen_neon_vextv2di; break;
28131 default:
28132 return false;
28135 /* Success! */
28136 if (d->testing_p)
28137 return true;
28139 offset = GEN_INT (location);
28140 emit_insn (gen (d->target, d->op0, d->op1, offset));
28141 return true;
28144 /* The NEON VTBL instruction is a fully variable permuation that's even
28145 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28146 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28147 can do slightly better by expanding this as a constant where we don't
28148 have to apply a mask. */
28150 static bool
28151 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
28153 rtx rperm[MAX_VECT_LEN], sel;
28154 machine_mode vmode = d->vmode;
28155 unsigned int i, nelt = d->nelt;
28157 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28158 numbering of elements for big-endian, we must reverse the order. */
28159 if (BYTES_BIG_ENDIAN)
28160 return false;
28162 if (d->testing_p)
28163 return true;
28165 /* Generic code will try constant permutation twice. Once with the
28166 original mode and again with the elements lowered to QImode.
28167 So wait and don't do the selector expansion ourselves. */
28168 if (vmode != V8QImode && vmode != V16QImode)
28169 return false;
28171 for (i = 0; i < nelt; ++i)
28172 rperm[i] = GEN_INT (d->perm[i]);
28173 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
28174 sel = force_reg (vmode, sel);
28176 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
28177 return true;
28180 static bool
28181 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
28183 /* Check if the input mask matches vext before reordering the
28184 operands. */
28185 if (TARGET_NEON)
28186 if (arm_evpc_neon_vext (d))
28187 return true;
28189 /* The pattern matching functions above are written to look for a small
28190 number to begin the sequence (0, 1, N/2). If we begin with an index
28191 from the second operand, we can swap the operands. */
28192 if (d->perm[0] >= d->nelt)
28194 unsigned i, nelt = d->nelt;
28195 rtx x;
28197 for (i = 0; i < nelt; ++i)
28198 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
28200 x = d->op0;
28201 d->op0 = d->op1;
28202 d->op1 = x;
28205 if (TARGET_NEON)
28207 if (arm_evpc_neon_vuzp (d))
28208 return true;
28209 if (arm_evpc_neon_vzip (d))
28210 return true;
28211 if (arm_evpc_neon_vrev (d))
28212 return true;
28213 if (arm_evpc_neon_vtrn (d))
28214 return true;
28215 return arm_evpc_neon_vtbl (d);
28217 return false;
28220 /* Expand a vec_perm_const pattern. */
28222 bool
28223 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
28225 struct expand_vec_perm_d d;
28226 int i, nelt, which;
28228 d.target = target;
28229 d.op0 = op0;
28230 d.op1 = op1;
28232 d.vmode = GET_MODE (target);
28233 gcc_assert (VECTOR_MODE_P (d.vmode));
28234 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28235 d.testing_p = false;
28237 for (i = which = 0; i < nelt; ++i)
28239 rtx e = XVECEXP (sel, 0, i);
28240 int ei = INTVAL (e) & (2 * nelt - 1);
28241 which |= (ei < nelt ? 1 : 2);
28242 d.perm[i] = ei;
28245 switch (which)
28247 default:
28248 gcc_unreachable();
28250 case 3:
28251 d.one_vector_p = false;
28252 if (!rtx_equal_p (op0, op1))
28253 break;
28255 /* The elements of PERM do not suggest that only the first operand
28256 is used, but both operands are identical. Allow easier matching
28257 of the permutation by folding the permutation into the single
28258 input vector. */
28259 /* FALLTHRU */
28260 case 2:
28261 for (i = 0; i < nelt; ++i)
28262 d.perm[i] &= nelt - 1;
28263 d.op0 = op1;
28264 d.one_vector_p = true;
28265 break;
28267 case 1:
28268 d.op1 = op0;
28269 d.one_vector_p = true;
28270 break;
28273 return arm_expand_vec_perm_const_1 (&d);
28276 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
28278 static bool
28279 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
28280 const unsigned char *sel)
28282 struct expand_vec_perm_d d;
28283 unsigned int i, nelt, which;
28284 bool ret;
28286 d.vmode = vmode;
28287 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28288 d.testing_p = true;
28289 memcpy (d.perm, sel, nelt);
28291 /* Categorize the set of elements in the selector. */
28292 for (i = which = 0; i < nelt; ++i)
28294 unsigned char e = d.perm[i];
28295 gcc_assert (e < 2 * nelt);
28296 which |= (e < nelt ? 1 : 2);
28299 /* For all elements from second vector, fold the elements to first. */
28300 if (which == 2)
28301 for (i = 0; i < nelt; ++i)
28302 d.perm[i] -= nelt;
28304 /* Check whether the mask can be applied to the vector type. */
28305 d.one_vector_p = (which != 3);
28307 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
28308 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
28309 if (!d.one_vector_p)
28310 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
28312 start_sequence ();
28313 ret = arm_expand_vec_perm_const_1 (&d);
28314 end_sequence ();
28316 return ret;
28319 bool
28320 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
28322 /* If we are soft float and we do not have ldrd
28323 then all auto increment forms are ok. */
28324 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
28325 return true;
28327 switch (code)
28329 /* Post increment and Pre Decrement are supported for all
28330 instruction forms except for vector forms. */
28331 case ARM_POST_INC:
28332 case ARM_PRE_DEC:
28333 if (VECTOR_MODE_P (mode))
28335 if (code != ARM_PRE_DEC)
28336 return true;
28337 else
28338 return false;
28341 return true;
28343 case ARM_POST_DEC:
28344 case ARM_PRE_INC:
28345 /* Without LDRD and mode size greater than
28346 word size, there is no point in auto-incrementing
28347 because ldm and stm will not have these forms. */
28348 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
28349 return false;
28351 /* Vector and floating point modes do not support
28352 these auto increment forms. */
28353 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
28354 return false;
28356 return true;
28358 default:
28359 return false;
28363 return false;
28366 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28367 on ARM, since we know that shifts by negative amounts are no-ops.
28368 Additionally, the default expansion code is not available or suitable
28369 for post-reload insn splits (this can occur when the register allocator
28370 chooses not to do a shift in NEON).
28372 This function is used in both initial expand and post-reload splits, and
28373 handles all kinds of 64-bit shifts.
28375 Input requirements:
28376 - It is safe for the input and output to be the same register, but
28377 early-clobber rules apply for the shift amount and scratch registers.
28378 - Shift by register requires both scratch registers. In all other cases
28379 the scratch registers may be NULL.
28380 - Ashiftrt by a register also clobbers the CC register. */
28381 void
28382 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
28383 rtx amount, rtx scratch1, rtx scratch2)
28385 rtx out_high = gen_highpart (SImode, out);
28386 rtx out_low = gen_lowpart (SImode, out);
28387 rtx in_high = gen_highpart (SImode, in);
28388 rtx in_low = gen_lowpart (SImode, in);
28390 /* Terminology:
28391 in = the register pair containing the input value.
28392 out = the destination register pair.
28393 up = the high- or low-part of each pair.
28394 down = the opposite part to "up".
28395 In a shift, we can consider bits to shift from "up"-stream to
28396 "down"-stream, so in a left-shift "up" is the low-part and "down"
28397 is the high-part of each register pair. */
28399 rtx out_up = code == ASHIFT ? out_low : out_high;
28400 rtx out_down = code == ASHIFT ? out_high : out_low;
28401 rtx in_up = code == ASHIFT ? in_low : in_high;
28402 rtx in_down = code == ASHIFT ? in_high : in_low;
28404 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
28405 gcc_assert (out
28406 && (REG_P (out) || GET_CODE (out) == SUBREG)
28407 && GET_MODE (out) == DImode);
28408 gcc_assert (in
28409 && (REG_P (in) || GET_CODE (in) == SUBREG)
28410 && GET_MODE (in) == DImode);
28411 gcc_assert (amount
28412 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
28413 && GET_MODE (amount) == SImode)
28414 || CONST_INT_P (amount)));
28415 gcc_assert (scratch1 == NULL
28416 || (GET_CODE (scratch1) == SCRATCH)
28417 || (GET_MODE (scratch1) == SImode
28418 && REG_P (scratch1)));
28419 gcc_assert (scratch2 == NULL
28420 || (GET_CODE (scratch2) == SCRATCH)
28421 || (GET_MODE (scratch2) == SImode
28422 && REG_P (scratch2)));
28423 gcc_assert (!REG_P (out) || !REG_P (amount)
28424 || !HARD_REGISTER_P (out)
28425 || (REGNO (out) != REGNO (amount)
28426 && REGNO (out) + 1 != REGNO (amount)));
28428 /* Macros to make following code more readable. */
28429 #define SUB_32(DEST,SRC) \
28430 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28431 #define RSB_32(DEST,SRC) \
28432 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28433 #define SUB_S_32(DEST,SRC) \
28434 gen_addsi3_compare0 ((DEST), (SRC), \
28435 GEN_INT (-32))
28436 #define SET(DEST,SRC) \
28437 gen_rtx_SET (SImode, (DEST), (SRC))
28438 #define SHIFT(CODE,SRC,AMOUNT) \
28439 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28440 #define LSHIFT(CODE,SRC,AMOUNT) \
28441 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28442 SImode, (SRC), (AMOUNT))
28443 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28444 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28445 SImode, (SRC), (AMOUNT))
28446 #define ORR(A,B) \
28447 gen_rtx_IOR (SImode, (A), (B))
28448 #define BRANCH(COND,LABEL) \
28449 gen_arm_cond_branch ((LABEL), \
28450 gen_rtx_ ## COND (CCmode, cc_reg, \
28451 const0_rtx), \
28452 cc_reg)
28454 /* Shifts by register and shifts by constant are handled separately. */
28455 if (CONST_INT_P (amount))
28457 /* We have a shift-by-constant. */
28459 /* First, handle out-of-range shift amounts.
28460 In both cases we try to match the result an ARM instruction in a
28461 shift-by-register would give. This helps reduce execution
28462 differences between optimization levels, but it won't stop other
28463 parts of the compiler doing different things. This is "undefined
28464 behaviour, in any case. */
28465 if (INTVAL (amount) <= 0)
28466 emit_insn (gen_movdi (out, in));
28467 else if (INTVAL (amount) >= 64)
28469 if (code == ASHIFTRT)
28471 rtx const31_rtx = GEN_INT (31);
28472 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
28473 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
28475 else
28476 emit_insn (gen_movdi (out, const0_rtx));
28479 /* Now handle valid shifts. */
28480 else if (INTVAL (amount) < 32)
28482 /* Shifts by a constant less than 32. */
28483 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
28485 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28486 emit_insn (SET (out_down,
28487 ORR (REV_LSHIFT (code, in_up, reverse_amount),
28488 out_down)));
28489 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28491 else
28493 /* Shifts by a constant greater than 31. */
28494 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
28496 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
28497 if (code == ASHIFTRT)
28498 emit_insn (gen_ashrsi3 (out_up, in_up,
28499 GEN_INT (31)));
28500 else
28501 emit_insn (SET (out_up, const0_rtx));
28504 else
28506 /* We have a shift-by-register. */
28507 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
28509 /* This alternative requires the scratch registers. */
28510 gcc_assert (scratch1 && REG_P (scratch1));
28511 gcc_assert (scratch2 && REG_P (scratch2));
28513 /* We will need the values "amount-32" and "32-amount" later.
28514 Swapping them around now allows the later code to be more general. */
28515 switch (code)
28517 case ASHIFT:
28518 emit_insn (SUB_32 (scratch1, amount));
28519 emit_insn (RSB_32 (scratch2, amount));
28520 break;
28521 case ASHIFTRT:
28522 emit_insn (RSB_32 (scratch1, amount));
28523 /* Also set CC = amount > 32. */
28524 emit_insn (SUB_S_32 (scratch2, amount));
28525 break;
28526 case LSHIFTRT:
28527 emit_insn (RSB_32 (scratch1, amount));
28528 emit_insn (SUB_32 (scratch2, amount));
28529 break;
28530 default:
28531 gcc_unreachable ();
28534 /* Emit code like this:
28536 arithmetic-left:
28537 out_down = in_down << amount;
28538 out_down = (in_up << (amount - 32)) | out_down;
28539 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28540 out_up = in_up << amount;
28542 arithmetic-right:
28543 out_down = in_down >> amount;
28544 out_down = (in_up << (32 - amount)) | out_down;
28545 if (amount < 32)
28546 out_down = ((signed)in_up >> (amount - 32)) | out_down;
28547 out_up = in_up << amount;
28549 logical-right:
28550 out_down = in_down >> amount;
28551 out_down = (in_up << (32 - amount)) | out_down;
28552 if (amount < 32)
28553 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28554 out_up = in_up << amount;
28556 The ARM and Thumb2 variants are the same but implemented slightly
28557 differently. If this were only called during expand we could just
28558 use the Thumb2 case and let combine do the right thing, but this
28559 can also be called from post-reload splitters. */
28561 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28563 if (!TARGET_THUMB2)
28565 /* Emit code for ARM mode. */
28566 emit_insn (SET (out_down,
28567 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
28568 if (code == ASHIFTRT)
28570 rtx_code_label *done_label = gen_label_rtx ();
28571 emit_jump_insn (BRANCH (LT, done_label));
28572 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
28573 out_down)));
28574 emit_label (done_label);
28576 else
28577 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
28578 out_down)));
28580 else
28582 /* Emit code for Thumb2 mode.
28583 Thumb2 can't do shift and or in one insn. */
28584 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
28585 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
28587 if (code == ASHIFTRT)
28589 rtx_code_label *done_label = gen_label_rtx ();
28590 emit_jump_insn (BRANCH (LT, done_label));
28591 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
28592 emit_insn (SET (out_down, ORR (out_down, scratch2)));
28593 emit_label (done_label);
28595 else
28597 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
28598 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
28602 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28605 #undef SUB_32
28606 #undef RSB_32
28607 #undef SUB_S_32
28608 #undef SET
28609 #undef SHIFT
28610 #undef LSHIFT
28611 #undef REV_LSHIFT
28612 #undef ORR
28613 #undef BRANCH
28617 /* Returns true if a valid comparison operation and makes
28618 the operands in a form that is valid. */
28619 bool
28620 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
28622 enum rtx_code code = GET_CODE (*comparison);
28623 int code_int;
28624 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
28625 ? GET_MODE (*op2) : GET_MODE (*op1);
28627 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
28629 if (code == UNEQ || code == LTGT)
28630 return false;
28632 code_int = (int)code;
28633 arm_canonicalize_comparison (&code_int, op1, op2, 0);
28634 PUT_CODE (*comparison, (enum rtx_code)code_int);
28636 switch (mode)
28638 case SImode:
28639 if (!arm_add_operand (*op1, mode))
28640 *op1 = force_reg (mode, *op1);
28641 if (!arm_add_operand (*op2, mode))
28642 *op2 = force_reg (mode, *op2);
28643 return true;
28645 case DImode:
28646 if (!cmpdi_operand (*op1, mode))
28647 *op1 = force_reg (mode, *op1);
28648 if (!cmpdi_operand (*op2, mode))
28649 *op2 = force_reg (mode, *op2);
28650 return true;
28652 case SFmode:
28653 case DFmode:
28654 if (!arm_float_compare_operand (*op1, mode))
28655 *op1 = force_reg (mode, *op1);
28656 if (!arm_float_compare_operand (*op2, mode))
28657 *op2 = force_reg (mode, *op2);
28658 return true;
28659 default:
28660 break;
28663 return false;
28667 /* Maximum number of instructions to set block of memory. */
28668 static int
28669 arm_block_set_max_insns (void)
28671 if (optimize_function_for_size_p (cfun))
28672 return 4;
28673 else
28674 return current_tune->max_insns_inline_memset;
28677 /* Return TRUE if it's profitable to set block of memory for
28678 non-vectorized case. VAL is the value to set the memory
28679 with. LENGTH is the number of bytes to set. ALIGN is the
28680 alignment of the destination memory in bytes. UNALIGNED_P
28681 is TRUE if we can only set the memory with instructions
28682 meeting alignment requirements. USE_STRD_P is TRUE if we
28683 can use strd to set the memory. */
28684 static bool
28685 arm_block_set_non_vect_profit_p (rtx val,
28686 unsigned HOST_WIDE_INT length,
28687 unsigned HOST_WIDE_INT align,
28688 bool unaligned_p, bool use_strd_p)
28690 int num = 0;
28691 /* For leftovers in bytes of 0-7, we can set the memory block using
28692 strb/strh/str with minimum instruction number. */
28693 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
28695 if (unaligned_p)
28697 num = arm_const_inline_cost (SET, val);
28698 num += length / align + length % align;
28700 else if (use_strd_p)
28702 num = arm_const_double_inline_cost (val);
28703 num += (length >> 3) + leftover[length & 7];
28705 else
28707 num = arm_const_inline_cost (SET, val);
28708 num += (length >> 2) + leftover[length & 3];
28711 /* We may be able to combine last pair STRH/STRB into a single STR
28712 by shifting one byte back. */
28713 if (unaligned_access && length > 3 && (length & 3) == 3)
28714 num--;
28716 return (num <= arm_block_set_max_insns ());
28719 /* Return TRUE if it's profitable to set block of memory for
28720 vectorized case. LENGTH is the number of bytes to set.
28721 ALIGN is the alignment of destination memory in bytes.
28722 MODE is the vector mode used to set the memory. */
28723 static bool
28724 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
28725 unsigned HOST_WIDE_INT align,
28726 machine_mode mode)
28728 int num;
28729 bool unaligned_p = ((align & 3) != 0);
28730 unsigned int nelt = GET_MODE_NUNITS (mode);
28732 /* Instruction loading constant value. */
28733 num = 1;
28734 /* Instructions storing the memory. */
28735 num += (length + nelt - 1) / nelt;
28736 /* Instructions adjusting the address expression. Only need to
28737 adjust address expression if it's 4 bytes aligned and bytes
28738 leftover can only be stored by mis-aligned store instruction. */
28739 if (!unaligned_p && (length & 3) != 0)
28740 num++;
28742 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
28743 if (!unaligned_p && mode == V16QImode)
28744 num--;
28746 return (num <= arm_block_set_max_insns ());
28749 /* Set a block of memory using vectorization instructions for the
28750 unaligned case. We fill the first LENGTH bytes of the memory
28751 area starting from DSTBASE with byte constant VALUE. ALIGN is
28752 the alignment requirement of memory. Return TRUE if succeeded. */
28753 static bool
28754 arm_block_set_unaligned_vect (rtx dstbase,
28755 unsigned HOST_WIDE_INT length,
28756 unsigned HOST_WIDE_INT value,
28757 unsigned HOST_WIDE_INT align)
28759 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
28760 rtx dst, mem;
28761 rtx val_elt, val_vec, reg;
28762 rtx rval[MAX_VECT_LEN];
28763 rtx (*gen_func) (rtx, rtx);
28764 machine_mode mode;
28765 unsigned HOST_WIDE_INT v = value;
28767 gcc_assert ((align & 0x3) != 0);
28768 nelt_v8 = GET_MODE_NUNITS (V8QImode);
28769 nelt_v16 = GET_MODE_NUNITS (V16QImode);
28770 if (length >= nelt_v16)
28772 mode = V16QImode;
28773 gen_func = gen_movmisalignv16qi;
28775 else
28777 mode = V8QImode;
28778 gen_func = gen_movmisalignv8qi;
28780 nelt_mode = GET_MODE_NUNITS (mode);
28781 gcc_assert (length >= nelt_mode);
28782 /* Skip if it isn't profitable. */
28783 if (!arm_block_set_vect_profit_p (length, align, mode))
28784 return false;
28786 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28787 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28789 v = sext_hwi (v, BITS_PER_WORD);
28790 val_elt = GEN_INT (v);
28791 for (j = 0; j < nelt_mode; j++)
28792 rval[j] = val_elt;
28794 reg = gen_reg_rtx (mode);
28795 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28796 /* Emit instruction loading the constant value. */
28797 emit_move_insn (reg, val_vec);
28799 /* Handle nelt_mode bytes in a vector. */
28800 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
28802 emit_insn ((*gen_func) (mem, reg));
28803 if (i + 2 * nelt_mode <= length)
28804 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
28807 /* If there are not less than nelt_v8 bytes leftover, we must be in
28808 V16QI mode. */
28809 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
28811 /* Handle (8, 16) bytes leftover. */
28812 if (i + nelt_v8 < length)
28814 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
28815 /* We are shifting bytes back, set the alignment accordingly. */
28816 if ((length & 1) != 0 && align >= 2)
28817 set_mem_align (mem, BITS_PER_UNIT);
28819 emit_insn (gen_movmisalignv16qi (mem, reg));
28821 /* Handle (0, 8] bytes leftover. */
28822 else if (i < length && i + nelt_v8 >= length)
28824 if (mode == V16QImode)
28826 reg = gen_lowpart (V8QImode, reg);
28827 mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
28829 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
28830 + (nelt_mode - nelt_v8))));
28831 /* We are shifting bytes back, set the alignment accordingly. */
28832 if ((length & 1) != 0 && align >= 2)
28833 set_mem_align (mem, BITS_PER_UNIT);
28835 emit_insn (gen_movmisalignv8qi (mem, reg));
28838 return true;
28841 /* Set a block of memory using vectorization instructions for the
28842 aligned case. We fill the first LENGTH bytes of the memory area
28843 starting from DSTBASE with byte constant VALUE. ALIGN is the
28844 alignment requirement of memory. Return TRUE if succeeded. */
28845 static bool
28846 arm_block_set_aligned_vect (rtx dstbase,
28847 unsigned HOST_WIDE_INT length,
28848 unsigned HOST_WIDE_INT value,
28849 unsigned HOST_WIDE_INT align)
28851 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
28852 rtx dst, addr, mem;
28853 rtx val_elt, val_vec, reg;
28854 rtx rval[MAX_VECT_LEN];
28855 machine_mode mode;
28856 unsigned HOST_WIDE_INT v = value;
28858 gcc_assert ((align & 0x3) == 0);
28859 nelt_v8 = GET_MODE_NUNITS (V8QImode);
28860 nelt_v16 = GET_MODE_NUNITS (V16QImode);
28861 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
28862 mode = V16QImode;
28863 else
28864 mode = V8QImode;
28866 nelt_mode = GET_MODE_NUNITS (mode);
28867 gcc_assert (length >= nelt_mode);
28868 /* Skip if it isn't profitable. */
28869 if (!arm_block_set_vect_profit_p (length, align, mode))
28870 return false;
28872 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28874 v = sext_hwi (v, BITS_PER_WORD);
28875 val_elt = GEN_INT (v);
28876 for (j = 0; j < nelt_mode; j++)
28877 rval[j] = val_elt;
28879 reg = gen_reg_rtx (mode);
28880 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28881 /* Emit instruction loading the constant value. */
28882 emit_move_insn (reg, val_vec);
28884 i = 0;
28885 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
28886 if (mode == V16QImode)
28888 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28889 emit_insn (gen_movmisalignv16qi (mem, reg));
28890 i += nelt_mode;
28891 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
28892 if (i + nelt_v8 < length && i + nelt_v16 > length)
28894 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
28895 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28896 /* We are shifting bytes back, set the alignment accordingly. */
28897 if ((length & 0x3) == 0)
28898 set_mem_align (mem, BITS_PER_UNIT * 4);
28899 else if ((length & 0x1) == 0)
28900 set_mem_align (mem, BITS_PER_UNIT * 2);
28901 else
28902 set_mem_align (mem, BITS_PER_UNIT);
28904 emit_insn (gen_movmisalignv16qi (mem, reg));
28905 return true;
28907 /* Fall through for bytes leftover. */
28908 mode = V8QImode;
28909 nelt_mode = GET_MODE_NUNITS (mode);
28910 reg = gen_lowpart (V8QImode, reg);
28913 /* Handle 8 bytes in a vector. */
28914 for (; (i + nelt_mode <= length); i += nelt_mode)
28916 addr = plus_constant (Pmode, dst, i);
28917 mem = adjust_automodify_address (dstbase, mode, addr, i);
28918 emit_move_insn (mem, reg);
28921 /* Handle single word leftover by shifting 4 bytes back. We can
28922 use aligned access for this case. */
28923 if (i + UNITS_PER_WORD == length)
28925 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
28926 mem = adjust_automodify_address (dstbase, mode,
28927 addr, i - UNITS_PER_WORD);
28928 /* We are shifting 4 bytes back, set the alignment accordingly. */
28929 if (align > UNITS_PER_WORD)
28930 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
28932 emit_move_insn (mem, reg);
28934 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
28935 We have to use unaligned access for this case. */
28936 else if (i < length)
28938 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
28939 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28940 /* We are shifting bytes back, set the alignment accordingly. */
28941 if ((length & 1) == 0)
28942 set_mem_align (mem, BITS_PER_UNIT * 2);
28943 else
28944 set_mem_align (mem, BITS_PER_UNIT);
28946 emit_insn (gen_movmisalignv8qi (mem, reg));
28949 return true;
28952 /* Set a block of memory using plain strh/strb instructions, only
28953 using instructions allowed by ALIGN on processor. We fill the
28954 first LENGTH bytes of the memory area starting from DSTBASE
28955 with byte constant VALUE. ALIGN is the alignment requirement
28956 of memory. */
28957 static bool
28958 arm_block_set_unaligned_non_vect (rtx dstbase,
28959 unsigned HOST_WIDE_INT length,
28960 unsigned HOST_WIDE_INT value,
28961 unsigned HOST_WIDE_INT align)
28963 unsigned int i;
28964 rtx dst, addr, mem;
28965 rtx val_exp, val_reg, reg;
28966 machine_mode mode;
28967 HOST_WIDE_INT v = value;
28969 gcc_assert (align == 1 || align == 2);
28971 if (align == 2)
28972 v |= (value << BITS_PER_UNIT);
28974 v = sext_hwi (v, BITS_PER_WORD);
28975 val_exp = GEN_INT (v);
28976 /* Skip if it isn't profitable. */
28977 if (!arm_block_set_non_vect_profit_p (val_exp, length,
28978 align, true, false))
28979 return false;
28981 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28982 mode = (align == 2 ? HImode : QImode);
28983 val_reg = force_reg (SImode, val_exp);
28984 reg = gen_lowpart (mode, val_reg);
28986 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
28988 addr = plus_constant (Pmode, dst, i);
28989 mem = adjust_automodify_address (dstbase, mode, addr, i);
28990 emit_move_insn (mem, reg);
28993 /* Handle single byte leftover. */
28994 if (i + 1 == length)
28996 reg = gen_lowpart (QImode, val_reg);
28997 addr = plus_constant (Pmode, dst, i);
28998 mem = adjust_automodify_address (dstbase, QImode, addr, i);
28999 emit_move_insn (mem, reg);
29000 i++;
29003 gcc_assert (i == length);
29004 return true;
29007 /* Set a block of memory using plain strd/str/strh/strb instructions,
29008 to permit unaligned copies on processors which support unaligned
29009 semantics for those instructions. We fill the first LENGTH bytes
29010 of the memory area starting from DSTBASE with byte constant VALUE.
29011 ALIGN is the alignment requirement of memory. */
29012 static bool
29013 arm_block_set_aligned_non_vect (rtx dstbase,
29014 unsigned HOST_WIDE_INT length,
29015 unsigned HOST_WIDE_INT value,
29016 unsigned HOST_WIDE_INT align)
29018 unsigned int i;
29019 rtx dst, addr, mem;
29020 rtx val_exp, val_reg, reg;
29021 unsigned HOST_WIDE_INT v;
29022 bool use_strd_p;
29024 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
29025 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
29027 v = (value | (value << 8) | (value << 16) | (value << 24));
29028 if (length < UNITS_PER_WORD)
29029 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
29031 if (use_strd_p)
29032 v |= (v << BITS_PER_WORD);
29033 else
29034 v = sext_hwi (v, BITS_PER_WORD);
29036 val_exp = GEN_INT (v);
29037 /* Skip if it isn't profitable. */
29038 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29039 align, false, use_strd_p))
29041 if (!use_strd_p)
29042 return false;
29044 /* Try without strd. */
29045 v = (v >> BITS_PER_WORD);
29046 v = sext_hwi (v, BITS_PER_WORD);
29047 val_exp = GEN_INT (v);
29048 use_strd_p = false;
29049 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29050 align, false, use_strd_p))
29051 return false;
29054 i = 0;
29055 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29056 /* Handle double words using strd if possible. */
29057 if (use_strd_p)
29059 val_reg = force_reg (DImode, val_exp);
29060 reg = val_reg;
29061 for (; (i + 8 <= length); i += 8)
29063 addr = plus_constant (Pmode, dst, i);
29064 mem = adjust_automodify_address (dstbase, DImode, addr, i);
29065 emit_move_insn (mem, reg);
29068 else
29069 val_reg = force_reg (SImode, val_exp);
29071 /* Handle words. */
29072 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
29073 for (; (i + 4 <= length); i += 4)
29075 addr = plus_constant (Pmode, dst, i);
29076 mem = adjust_automodify_address (dstbase, SImode, addr, i);
29077 if ((align & 3) == 0)
29078 emit_move_insn (mem, reg);
29079 else
29080 emit_insn (gen_unaligned_storesi (mem, reg));
29083 /* Merge last pair of STRH and STRB into a STR if possible. */
29084 if (unaligned_access && i > 0 && (i + 3) == length)
29086 addr = plus_constant (Pmode, dst, i - 1);
29087 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
29088 /* We are shifting one byte back, set the alignment accordingly. */
29089 if ((align & 1) == 0)
29090 set_mem_align (mem, BITS_PER_UNIT);
29092 /* Most likely this is an unaligned access, and we can't tell at
29093 compilation time. */
29094 emit_insn (gen_unaligned_storesi (mem, reg));
29095 return true;
29098 /* Handle half word leftover. */
29099 if (i + 2 <= length)
29101 reg = gen_lowpart (HImode, val_reg);
29102 addr = plus_constant (Pmode, dst, i);
29103 mem = adjust_automodify_address (dstbase, HImode, addr, i);
29104 if ((align & 1) == 0)
29105 emit_move_insn (mem, reg);
29106 else
29107 emit_insn (gen_unaligned_storehi (mem, reg));
29109 i += 2;
29112 /* Handle single byte leftover. */
29113 if (i + 1 == length)
29115 reg = gen_lowpart (QImode, val_reg);
29116 addr = plus_constant (Pmode, dst, i);
29117 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29118 emit_move_insn (mem, reg);
29121 return true;
29124 /* Set a block of memory using vectorization instructions for both
29125 aligned and unaligned cases. We fill the first LENGTH bytes of
29126 the memory area starting from DSTBASE with byte constant VALUE.
29127 ALIGN is the alignment requirement of memory. */
29128 static bool
29129 arm_block_set_vect (rtx dstbase,
29130 unsigned HOST_WIDE_INT length,
29131 unsigned HOST_WIDE_INT value,
29132 unsigned HOST_WIDE_INT align)
29134 /* Check whether we need to use unaligned store instruction. */
29135 if (((align & 3) != 0 || (length & 3) != 0)
29136 /* Check whether unaligned store instruction is available. */
29137 && (!unaligned_access || BYTES_BIG_ENDIAN))
29138 return false;
29140 if ((align & 3) == 0)
29141 return arm_block_set_aligned_vect (dstbase, length, value, align);
29142 else
29143 return arm_block_set_unaligned_vect (dstbase, length, value, align);
29146 /* Expand string store operation. Firstly we try to do that by using
29147 vectorization instructions, then try with ARM unaligned access and
29148 double-word store if profitable. OPERANDS[0] is the destination,
29149 OPERANDS[1] is the number of bytes, operands[2] is the value to
29150 initialize the memory, OPERANDS[3] is the known alignment of the
29151 destination. */
29152 bool
29153 arm_gen_setmem (rtx *operands)
29155 rtx dstbase = operands[0];
29156 unsigned HOST_WIDE_INT length;
29157 unsigned HOST_WIDE_INT value;
29158 unsigned HOST_WIDE_INT align;
29160 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
29161 return false;
29163 length = UINTVAL (operands[1]);
29164 if (length > 64)
29165 return false;
29167 value = (UINTVAL (operands[2]) & 0xFF);
29168 align = UINTVAL (operands[3]);
29169 if (TARGET_NEON && length >= 8
29170 && current_tune->string_ops_prefer_neon
29171 && arm_block_set_vect (dstbase, length, value, align))
29172 return true;
29174 if (!unaligned_access && (align & 3) != 0)
29175 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
29177 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
29180 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
29182 static unsigned HOST_WIDE_INT
29183 arm_asan_shadow_offset (void)
29185 return (unsigned HOST_WIDE_INT) 1 << 29;
29189 /* This is a temporary fix for PR60655. Ideally we need
29190 to handle most of these cases in the generic part but
29191 currently we reject minus (..) (sym_ref). We try to
29192 ameliorate the case with minus (sym_ref1) (sym_ref2)
29193 where they are in the same section. */
29195 static bool
29196 arm_const_not_ok_for_debug_p (rtx p)
29198 tree decl_op0 = NULL;
29199 tree decl_op1 = NULL;
29201 if (GET_CODE (p) == MINUS)
29203 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
29205 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
29206 if (decl_op1
29207 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
29208 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
29210 if ((TREE_CODE (decl_op1) == VAR_DECL
29211 || TREE_CODE (decl_op1) == CONST_DECL)
29212 && (TREE_CODE (decl_op0) == VAR_DECL
29213 || TREE_CODE (decl_op0) == CONST_DECL))
29214 return (get_variable_section (decl_op1, false)
29215 != get_variable_section (decl_op0, false));
29217 if (TREE_CODE (decl_op1) == LABEL_DECL
29218 && TREE_CODE (decl_op0) == LABEL_DECL)
29219 return (DECL_CONTEXT (decl_op1)
29220 != DECL_CONTEXT (decl_op0));
29223 return true;
29227 return false;
29230 /* return TRUE if x is a reference to a value in a constant pool */
29231 extern bool
29232 arm_is_constant_pool_ref (rtx x)
29234 return (MEM_P (x)
29235 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
29236 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
29239 /* If MEM is in the form of [base+offset], extract the two parts
29240 of address and set to BASE and OFFSET, otherwise return false
29241 after clearing BASE and OFFSET. */
29243 static bool
29244 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
29246 rtx addr;
29248 gcc_assert (MEM_P (mem));
29250 addr = XEXP (mem, 0);
29252 /* Strip off const from addresses like (const (addr)). */
29253 if (GET_CODE (addr) == CONST)
29254 addr = XEXP (addr, 0);
29256 if (GET_CODE (addr) == REG)
29258 *base = addr;
29259 *offset = const0_rtx;
29260 return true;
29263 if (GET_CODE (addr) == PLUS
29264 && GET_CODE (XEXP (addr, 0)) == REG
29265 && CONST_INT_P (XEXP (addr, 1)))
29267 *base = XEXP (addr, 0);
29268 *offset = XEXP (addr, 1);
29269 return true;
29272 *base = NULL_RTX;
29273 *offset = NULL_RTX;
29275 return false;
29278 /* If INSN is a load or store of address in the form of [base+offset],
29279 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
29280 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
29281 otherwise return FALSE. */
29283 static bool
29284 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
29286 rtx x, dest, src;
29288 gcc_assert (INSN_P (insn));
29289 x = PATTERN (insn);
29290 if (GET_CODE (x) != SET)
29291 return false;
29293 src = SET_SRC (x);
29294 dest = SET_DEST (x);
29295 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
29297 *is_load = false;
29298 extract_base_offset_in_addr (dest, base, offset);
29300 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
29302 *is_load = true;
29303 extract_base_offset_in_addr (src, base, offset);
29305 else
29306 return false;
29308 return (*base != NULL_RTX && *offset != NULL_RTX);
29311 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
29313 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
29314 and PRI are only calculated for these instructions. For other instruction,
29315 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
29316 instruction fusion can be supported by returning different priorities.
29318 It's important that irrelevant instructions get the largest FUSION_PRI. */
29320 static void
29321 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
29322 int *fusion_pri, int *pri)
29324 int tmp, off_val;
29325 bool is_load;
29326 rtx base, offset;
29328 gcc_assert (INSN_P (insn));
29330 tmp = max_pri - 1;
29331 if (!fusion_load_store (insn, &base, &offset, &is_load))
29333 *pri = tmp;
29334 *fusion_pri = tmp;
29335 return;
29338 /* Load goes first. */
29339 if (is_load)
29340 *fusion_pri = tmp - 1;
29341 else
29342 *fusion_pri = tmp - 2;
29344 tmp /= 2;
29346 /* INSN with smaller base register goes first. */
29347 tmp -= ((REGNO (base) & 0xff) << 20);
29349 /* INSN with smaller offset goes first. */
29350 off_val = (int)(INTVAL (offset));
29351 if (off_val >= 0)
29352 tmp -= (off_val & 0xfffff);
29353 else
29354 tmp += ((- off_val) & 0xfffff);
29356 *pri = tmp;
29357 return;
29359 #include "gt-arm.h"