2014-12-02 Tom de Vries <tom@codesourcery.com>
[official-gcc.git] / gcc / config / arm / arm.c
blobf3be6cfc7f4092098734f228169c51704ea94103
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "hash-table.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
32 #include "calls.h"
33 #include "varasm.h"
34 #include "obstack.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
39 #include "output.h"
40 #include "insn-attr.h"
41 #include "flags.h"
42 #include "reload.h"
43 #include "hashtab.h"
44 #include "hash-set.h"
45 #include "vec.h"
46 #include "machmode.h"
47 #include "input.h"
48 #include "function.h"
49 #include "expr.h"
50 #include "insn-codes.h"
51 #include "optabs.h"
52 #include "diagnostic-core.h"
53 #include "recog.h"
54 #include "predict.h"
55 #include "dominance.h"
56 #include "cfg.h"
57 #include "cfgrtl.h"
58 #include "cfganal.h"
59 #include "lcm.h"
60 #include "cfgbuild.h"
61 #include "cfgcleanup.h"
62 #include "basic-block.h"
63 #include "hash-map.h"
64 #include "is-a.h"
65 #include "plugin-api.h"
66 #include "ipa-ref.h"
67 #include "cgraph.h"
68 #include "ggc.h"
69 #include "except.h"
70 #include "tm_p.h"
71 #include "target.h"
72 #include "sched-int.h"
73 #include "target-def.h"
74 #include "debug.h"
75 #include "langhooks.h"
76 #include "df.h"
77 #include "intl.h"
78 #include "libfuncs.h"
79 #include "params.h"
80 #include "opts.h"
81 #include "dumpfile.h"
82 #include "gimple-expr.h"
83 #include "builtins.h"
84 #include "tm-constrs.h"
85 #include "rtl-iter.h"
87 /* Forward definitions of types. */
88 typedef struct minipool_node Mnode;
89 typedef struct minipool_fixup Mfix;
91 void (*arm_lang_output_object_attributes_hook)(void);
93 struct four_ints
95 int i[4];
98 /* Forward function declarations. */
99 static bool arm_const_not_ok_for_debug_p (rtx);
100 static bool arm_lra_p (void);
101 static bool arm_needs_doubleword_align (machine_mode, const_tree);
102 static int arm_compute_static_chain_stack_bytes (void);
103 static arm_stack_offsets *arm_get_frame_offsets (void);
104 static void arm_add_gc_roots (void);
105 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
106 HOST_WIDE_INT, rtx, rtx, int, int);
107 static unsigned bit_count (unsigned long);
108 static int arm_address_register_rtx_p (rtx, int);
109 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
110 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
111 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
112 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
113 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
114 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
115 inline static int thumb1_index_register_rtx_p (rtx, int);
116 static int thumb_far_jump_used_p (void);
117 static bool thumb_force_lr_save (void);
118 static unsigned arm_size_return_regs (void);
119 static bool arm_assemble_integer (rtx, unsigned int, int);
120 static void arm_print_operand (FILE *, rtx, int);
121 static void arm_print_operand_address (FILE *, rtx);
122 static bool arm_print_operand_punct_valid_p (unsigned char code);
123 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
124 static arm_cc get_arm_condition_code (rtx);
125 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
126 static const char *output_multi_immediate (rtx *, const char *, const char *,
127 int, HOST_WIDE_INT);
128 static const char *shift_op (rtx, HOST_WIDE_INT *);
129 static struct machine_function *arm_init_machine_status (void);
130 static void thumb_exit (FILE *, int);
131 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
132 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
133 static Mnode *add_minipool_forward_ref (Mfix *);
134 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
135 static Mnode *add_minipool_backward_ref (Mfix *);
136 static void assign_minipool_offsets (Mfix *);
137 static void arm_print_value (FILE *, rtx);
138 static void dump_minipool (rtx_insn *);
139 static int arm_barrier_cost (rtx);
140 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
141 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
142 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
143 machine_mode, rtx);
144 static void arm_reorg (void);
145 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
146 static unsigned long arm_compute_save_reg0_reg12_mask (void);
147 static unsigned long arm_compute_save_reg_mask (void);
148 static unsigned long arm_isr_value (tree);
149 static unsigned long arm_compute_func_type (void);
150 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
151 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
152 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
153 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
154 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
155 #endif
156 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
157 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
158 static int arm_comp_type_attributes (const_tree, const_tree);
159 static void arm_set_default_type_attributes (tree);
160 static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
161 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
162 static int optimal_immediate_sequence (enum rtx_code code,
163 unsigned HOST_WIDE_INT val,
164 struct four_ints *return_sequence);
165 static int optimal_immediate_sequence_1 (enum rtx_code code,
166 unsigned HOST_WIDE_INT val,
167 struct four_ints *return_sequence,
168 int i);
169 static int arm_get_strip_length (int);
170 static bool arm_function_ok_for_sibcall (tree, tree);
171 static machine_mode arm_promote_function_mode (const_tree,
172 machine_mode, int *,
173 const_tree, int);
174 static bool arm_return_in_memory (const_tree, const_tree);
175 static rtx arm_function_value (const_tree, const_tree, bool);
176 static rtx arm_libcall_value_1 (machine_mode);
177 static rtx arm_libcall_value (machine_mode, const_rtx);
178 static bool arm_function_value_regno_p (const unsigned int);
179 static void arm_internal_label (FILE *, const char *, unsigned long);
180 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
181 tree);
182 static bool arm_have_conditional_execution (void);
183 static bool arm_cannot_force_const_mem (machine_mode, rtx);
184 static bool arm_legitimate_constant_p (machine_mode, rtx);
185 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
186 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
187 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
188 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
189 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
190 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
191 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
192 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
193 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
194 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
195 static void emit_constant_insn (rtx cond, rtx pattern);
196 static rtx_insn *emit_set_insn (rtx, rtx);
197 static rtx emit_multi_reg_push (unsigned long, unsigned long);
198 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
199 tree, bool);
200 static rtx arm_function_arg (cumulative_args_t, machine_mode,
201 const_tree, bool);
202 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
203 const_tree, bool);
204 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
205 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
206 const_tree);
207 static rtx aapcs_libcall_value (machine_mode);
208 static int aapcs_select_return_coproc (const_tree, const_tree);
210 #ifdef OBJECT_FORMAT_ELF
211 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
212 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
213 #endif
214 #ifndef ARM_PE
215 static void arm_encode_section_info (tree, rtx, int);
216 #endif
218 static void arm_file_end (void);
219 static void arm_file_start (void);
221 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
222 tree, int *, int);
223 static bool arm_pass_by_reference (cumulative_args_t,
224 machine_mode, const_tree, bool);
225 static bool arm_promote_prototypes (const_tree);
226 static bool arm_default_short_enums (void);
227 static bool arm_align_anon_bitfield (void);
228 static bool arm_return_in_msb (const_tree);
229 static bool arm_must_pass_in_stack (machine_mode, const_tree);
230 static bool arm_return_in_memory (const_tree, const_tree);
231 #if ARM_UNWIND_INFO
232 static void arm_unwind_emit (FILE *, rtx_insn *);
233 static bool arm_output_ttype (rtx);
234 static void arm_asm_emit_except_personality (rtx);
235 static void arm_asm_init_sections (void);
236 #endif
237 static rtx arm_dwarf_register_span (rtx);
239 static tree arm_cxx_guard_type (void);
240 static bool arm_cxx_guard_mask_bit (void);
241 static tree arm_get_cookie_size (tree);
242 static bool arm_cookie_has_size (void);
243 static bool arm_cxx_cdtor_returns_this (void);
244 static bool arm_cxx_key_method_may_be_inline (void);
245 static void arm_cxx_determine_class_data_visibility (tree);
246 static bool arm_cxx_class_data_always_comdat (void);
247 static bool arm_cxx_use_aeabi_atexit (void);
248 static void arm_init_libfuncs (void);
249 static tree arm_build_builtin_va_list (void);
250 static void arm_expand_builtin_va_start (tree, rtx);
251 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
252 static void arm_option_override (void);
253 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
254 static bool arm_cannot_copy_insn_p (rtx_insn *);
255 static int arm_issue_rate (void);
256 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
257 static bool arm_output_addr_const_extra (FILE *, rtx);
258 static bool arm_allocate_stack_slots_for_args (void);
259 static bool arm_warn_func_return (tree);
260 static const char *arm_invalid_parameter_type (const_tree t);
261 static const char *arm_invalid_return_type (const_tree t);
262 static tree arm_promoted_type (const_tree t);
263 static tree arm_convert_to_type (tree type, tree expr);
264 static bool arm_scalar_mode_supported_p (machine_mode);
265 static bool arm_frame_pointer_required (void);
266 static bool arm_can_eliminate (const int, const int);
267 static void arm_asm_trampoline_template (FILE *);
268 static void arm_trampoline_init (rtx, tree, rtx);
269 static rtx arm_trampoline_adjust_address (rtx);
270 static rtx arm_pic_static_addr (rtx orig, rtx reg);
271 static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
272 static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
273 static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
274 static bool arm_array_mode_supported_p (machine_mode,
275 unsigned HOST_WIDE_INT);
276 static machine_mode arm_preferred_simd_mode (machine_mode);
277 static bool arm_class_likely_spilled_p (reg_class_t);
278 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
279 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
280 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
281 const_tree type,
282 int misalignment,
283 bool is_packed);
284 static void arm_conditional_register_usage (void);
285 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
286 static unsigned int arm_autovectorize_vector_sizes (void);
287 static int arm_default_branch_cost (bool, bool);
288 static int arm_cortex_a5_branch_cost (bool, bool);
289 static int arm_cortex_m_branch_cost (bool, bool);
291 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
292 const unsigned char *sel);
294 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
295 tree vectype,
296 int misalign ATTRIBUTE_UNUSED);
297 static unsigned arm_add_stmt_cost (void *data, int count,
298 enum vect_cost_for_stmt kind,
299 struct _stmt_vec_info *stmt_info,
300 int misalign,
301 enum vect_cost_model_location where);
303 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
304 bool op0_preserve_value);
305 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
307 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
309 /* Table of machine attributes. */
310 static const struct attribute_spec arm_attribute_table[] =
312 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
313 affects_type_identity } */
314 /* Function calls made to this symbol must be done indirectly, because
315 it may lie outside of the 26 bit addressing range of a normal function
316 call. */
317 { "long_call", 0, 0, false, true, true, NULL, false },
318 /* Whereas these functions are always known to reside within the 26 bit
319 addressing range. */
320 { "short_call", 0, 0, false, true, true, NULL, false },
321 /* Specify the procedure call conventions for a function. */
322 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
323 false },
324 /* Interrupt Service Routines have special prologue and epilogue requirements. */
325 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
326 false },
327 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
328 false },
329 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
330 false },
331 #ifdef ARM_PE
332 /* ARM/PE has three new attributes:
333 interfacearm - ?
334 dllexport - for exporting a function/variable that will live in a dll
335 dllimport - for importing a function/variable from a dll
337 Microsoft allows multiple declspecs in one __declspec, separating
338 them with spaces. We do NOT support this. Instead, use __declspec
339 multiple times.
341 { "dllimport", 0, 0, true, false, false, NULL, false },
342 { "dllexport", 0, 0, true, false, false, NULL, false },
343 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
344 false },
345 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
346 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
347 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
348 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
349 false },
350 #endif
351 { NULL, 0, 0, false, false, false, NULL, false }
354 /* Initialize the GCC target structure. */
355 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
356 #undef TARGET_MERGE_DECL_ATTRIBUTES
357 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
358 #endif
360 #undef TARGET_LEGITIMIZE_ADDRESS
361 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
363 #undef TARGET_LRA_P
364 #define TARGET_LRA_P arm_lra_p
366 #undef TARGET_ATTRIBUTE_TABLE
367 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
369 #undef TARGET_ASM_FILE_START
370 #define TARGET_ASM_FILE_START arm_file_start
371 #undef TARGET_ASM_FILE_END
372 #define TARGET_ASM_FILE_END arm_file_end
374 #undef TARGET_ASM_ALIGNED_SI_OP
375 #define TARGET_ASM_ALIGNED_SI_OP NULL
376 #undef TARGET_ASM_INTEGER
377 #define TARGET_ASM_INTEGER arm_assemble_integer
379 #undef TARGET_PRINT_OPERAND
380 #define TARGET_PRINT_OPERAND arm_print_operand
381 #undef TARGET_PRINT_OPERAND_ADDRESS
382 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
383 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
384 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
386 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
387 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
389 #undef TARGET_ASM_FUNCTION_PROLOGUE
390 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
392 #undef TARGET_ASM_FUNCTION_EPILOGUE
393 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
395 #undef TARGET_OPTION_OVERRIDE
396 #define TARGET_OPTION_OVERRIDE arm_option_override
398 #undef TARGET_COMP_TYPE_ATTRIBUTES
399 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
401 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
402 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
404 #undef TARGET_SCHED_ADJUST_COST
405 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
407 #undef TARGET_SCHED_REORDER
408 #define TARGET_SCHED_REORDER arm_sched_reorder
410 #undef TARGET_REGISTER_MOVE_COST
411 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
413 #undef TARGET_MEMORY_MOVE_COST
414 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
416 #undef TARGET_ENCODE_SECTION_INFO
417 #ifdef ARM_PE
418 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
419 #else
420 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
421 #endif
423 #undef TARGET_STRIP_NAME_ENCODING
424 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
426 #undef TARGET_ASM_INTERNAL_LABEL
427 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
429 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
430 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
432 #undef TARGET_FUNCTION_VALUE
433 #define TARGET_FUNCTION_VALUE arm_function_value
435 #undef TARGET_LIBCALL_VALUE
436 #define TARGET_LIBCALL_VALUE arm_libcall_value
438 #undef TARGET_FUNCTION_VALUE_REGNO_P
439 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
441 #undef TARGET_ASM_OUTPUT_MI_THUNK
442 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
443 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
444 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
446 #undef TARGET_RTX_COSTS
447 #define TARGET_RTX_COSTS arm_rtx_costs
448 #undef TARGET_ADDRESS_COST
449 #define TARGET_ADDRESS_COST arm_address_cost
451 #undef TARGET_SHIFT_TRUNCATION_MASK
452 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
453 #undef TARGET_VECTOR_MODE_SUPPORTED_P
454 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
455 #undef TARGET_ARRAY_MODE_SUPPORTED_P
456 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
457 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
458 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
459 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
460 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
461 arm_autovectorize_vector_sizes
463 #undef TARGET_MACHINE_DEPENDENT_REORG
464 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
466 #undef TARGET_INIT_BUILTINS
467 #define TARGET_INIT_BUILTINS arm_init_builtins
468 #undef TARGET_EXPAND_BUILTIN
469 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
470 #undef TARGET_BUILTIN_DECL
471 #define TARGET_BUILTIN_DECL arm_builtin_decl
473 #undef TARGET_INIT_LIBFUNCS
474 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
476 #undef TARGET_PROMOTE_FUNCTION_MODE
477 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
478 #undef TARGET_PROMOTE_PROTOTYPES
479 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
480 #undef TARGET_PASS_BY_REFERENCE
481 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
482 #undef TARGET_ARG_PARTIAL_BYTES
483 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
484 #undef TARGET_FUNCTION_ARG
485 #define TARGET_FUNCTION_ARG arm_function_arg
486 #undef TARGET_FUNCTION_ARG_ADVANCE
487 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
488 #undef TARGET_FUNCTION_ARG_BOUNDARY
489 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
491 #undef TARGET_SETUP_INCOMING_VARARGS
492 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
494 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
495 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
497 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
498 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
499 #undef TARGET_TRAMPOLINE_INIT
500 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
501 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
502 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
504 #undef TARGET_WARN_FUNC_RETURN
505 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
507 #undef TARGET_DEFAULT_SHORT_ENUMS
508 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
510 #undef TARGET_ALIGN_ANON_BITFIELD
511 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
513 #undef TARGET_NARROW_VOLATILE_BITFIELD
514 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
516 #undef TARGET_CXX_GUARD_TYPE
517 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
519 #undef TARGET_CXX_GUARD_MASK_BIT
520 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
522 #undef TARGET_CXX_GET_COOKIE_SIZE
523 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
525 #undef TARGET_CXX_COOKIE_HAS_SIZE
526 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
528 #undef TARGET_CXX_CDTOR_RETURNS_THIS
529 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
531 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
532 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
534 #undef TARGET_CXX_USE_AEABI_ATEXIT
535 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
537 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
538 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
539 arm_cxx_determine_class_data_visibility
541 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
542 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
544 #undef TARGET_RETURN_IN_MSB
545 #define TARGET_RETURN_IN_MSB arm_return_in_msb
547 #undef TARGET_RETURN_IN_MEMORY
548 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
550 #undef TARGET_MUST_PASS_IN_STACK
551 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
553 #if ARM_UNWIND_INFO
554 #undef TARGET_ASM_UNWIND_EMIT
555 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
557 /* EABI unwinding tables use a different format for the typeinfo tables. */
558 #undef TARGET_ASM_TTYPE
559 #define TARGET_ASM_TTYPE arm_output_ttype
561 #undef TARGET_ARM_EABI_UNWINDER
562 #define TARGET_ARM_EABI_UNWINDER true
564 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
565 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
567 #undef TARGET_ASM_INIT_SECTIONS
568 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
569 #endif /* ARM_UNWIND_INFO */
571 #undef TARGET_DWARF_REGISTER_SPAN
572 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
574 #undef TARGET_CANNOT_COPY_INSN_P
575 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
577 #ifdef HAVE_AS_TLS
578 #undef TARGET_HAVE_TLS
579 #define TARGET_HAVE_TLS true
580 #endif
582 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
583 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
585 #undef TARGET_LEGITIMATE_CONSTANT_P
586 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
588 #undef TARGET_CANNOT_FORCE_CONST_MEM
589 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
591 #undef TARGET_MAX_ANCHOR_OFFSET
592 #define TARGET_MAX_ANCHOR_OFFSET 4095
594 /* The minimum is set such that the total size of the block
595 for a particular anchor is -4088 + 1 + 4095 bytes, which is
596 divisible by eight, ensuring natural spacing of anchors. */
597 #undef TARGET_MIN_ANCHOR_OFFSET
598 #define TARGET_MIN_ANCHOR_OFFSET -4088
600 #undef TARGET_SCHED_ISSUE_RATE
601 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
603 #undef TARGET_MANGLE_TYPE
604 #define TARGET_MANGLE_TYPE arm_mangle_type
606 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
607 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
609 #undef TARGET_BUILD_BUILTIN_VA_LIST
610 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
611 #undef TARGET_EXPAND_BUILTIN_VA_START
612 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
613 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
614 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
616 #ifdef HAVE_AS_TLS
617 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
618 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
619 #endif
621 #undef TARGET_LEGITIMATE_ADDRESS_P
622 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
624 #undef TARGET_PREFERRED_RELOAD_CLASS
625 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
627 #undef TARGET_INVALID_PARAMETER_TYPE
628 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
630 #undef TARGET_INVALID_RETURN_TYPE
631 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
633 #undef TARGET_PROMOTED_TYPE
634 #define TARGET_PROMOTED_TYPE arm_promoted_type
636 #undef TARGET_CONVERT_TO_TYPE
637 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
639 #undef TARGET_SCALAR_MODE_SUPPORTED_P
640 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
642 #undef TARGET_FRAME_POINTER_REQUIRED
643 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
645 #undef TARGET_CAN_ELIMINATE
646 #define TARGET_CAN_ELIMINATE arm_can_eliminate
648 #undef TARGET_CONDITIONAL_REGISTER_USAGE
649 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
651 #undef TARGET_CLASS_LIKELY_SPILLED_P
652 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
654 #undef TARGET_VECTORIZE_BUILTINS
655 #define TARGET_VECTORIZE_BUILTINS
657 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
658 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
659 arm_builtin_vectorized_function
661 #undef TARGET_VECTOR_ALIGNMENT
662 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
664 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
665 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
666 arm_vector_alignment_reachable
668 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
669 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
670 arm_builtin_support_vector_misalignment
672 #undef TARGET_PREFERRED_RENAME_CLASS
673 #define TARGET_PREFERRED_RENAME_CLASS \
674 arm_preferred_rename_class
676 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
677 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
678 arm_vectorize_vec_perm_const_ok
680 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
681 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
682 arm_builtin_vectorization_cost
683 #undef TARGET_VECTORIZE_ADD_STMT_COST
684 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
686 #undef TARGET_CANONICALIZE_COMPARISON
687 #define TARGET_CANONICALIZE_COMPARISON \
688 arm_canonicalize_comparison
690 #undef TARGET_ASAN_SHADOW_OFFSET
691 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
693 #undef MAX_INSN_PER_IT_BLOCK
694 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
696 #undef TARGET_CAN_USE_DOLOOP_P
697 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
699 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
700 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
702 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
703 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
705 #undef TARGET_SCHED_FUSION_PRIORITY
706 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
708 struct gcc_target targetm = TARGET_INITIALIZER;
710 /* Obstack for minipool constant handling. */
711 static struct obstack minipool_obstack;
712 static char * minipool_startobj;
714 /* The maximum number of insns skipped which
715 will be conditionalised if possible. */
716 static int max_insns_skipped = 5;
718 extern FILE * asm_out_file;
720 /* True if we are currently building a constant table. */
721 int making_const_table;
723 /* The processor for which instructions should be scheduled. */
724 enum processor_type arm_tune = arm_none;
726 /* The current tuning set. */
727 const struct tune_params *current_tune;
729 /* Which floating point hardware to schedule for. */
730 int arm_fpu_attr;
732 /* Which floating popint hardware to use. */
733 const struct arm_fpu_desc *arm_fpu_desc;
735 /* Used for Thumb call_via trampolines. */
736 rtx thumb_call_via_label[14];
737 static int thumb_call_reg_needed;
739 /* The bits in this mask specify which
740 instructions we are allowed to generate. */
741 unsigned long insn_flags = 0;
743 /* The bits in this mask specify which instruction scheduling options should
744 be used. */
745 unsigned long tune_flags = 0;
747 /* The highest ARM architecture version supported by the
748 target. */
749 enum base_architecture arm_base_arch = BASE_ARCH_0;
751 /* The following are used in the arm.md file as equivalents to bits
752 in the above two flag variables. */
754 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
755 int arm_arch3m = 0;
757 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
758 int arm_arch4 = 0;
760 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
761 int arm_arch4t = 0;
763 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
764 int arm_arch5 = 0;
766 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
767 int arm_arch5e = 0;
769 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
770 int arm_arch6 = 0;
772 /* Nonzero if this chip supports the ARM 6K extensions. */
773 int arm_arch6k = 0;
775 /* Nonzero if instructions present in ARMv6-M can be used. */
776 int arm_arch6m = 0;
778 /* Nonzero if this chip supports the ARM 7 extensions. */
779 int arm_arch7 = 0;
781 /* Nonzero if instructions not present in the 'M' profile can be used. */
782 int arm_arch_notm = 0;
784 /* Nonzero if instructions present in ARMv7E-M can be used. */
785 int arm_arch7em = 0;
787 /* Nonzero if instructions present in ARMv8 can be used. */
788 int arm_arch8 = 0;
790 /* Nonzero if this chip can benefit from load scheduling. */
791 int arm_ld_sched = 0;
793 /* Nonzero if this chip is a StrongARM. */
794 int arm_tune_strongarm = 0;
796 /* Nonzero if this chip supports Intel Wireless MMX technology. */
797 int arm_arch_iwmmxt = 0;
799 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
800 int arm_arch_iwmmxt2 = 0;
802 /* Nonzero if this chip is an XScale. */
803 int arm_arch_xscale = 0;
805 /* Nonzero if tuning for XScale */
806 int arm_tune_xscale = 0;
808 /* Nonzero if we want to tune for stores that access the write-buffer.
809 This typically means an ARM6 or ARM7 with MMU or MPU. */
810 int arm_tune_wbuf = 0;
812 /* Nonzero if tuning for Cortex-A9. */
813 int arm_tune_cortex_a9 = 0;
815 /* Nonzero if generating Thumb instructions. */
816 int thumb_code = 0;
818 /* Nonzero if generating Thumb-1 instructions. */
819 int thumb1_code = 0;
821 /* Nonzero if we should define __THUMB_INTERWORK__ in the
822 preprocessor.
823 XXX This is a bit of a hack, it's intended to help work around
824 problems in GLD which doesn't understand that armv5t code is
825 interworking clean. */
826 int arm_cpp_interwork = 0;
828 /* Nonzero if chip supports Thumb 2. */
829 int arm_arch_thumb2;
831 /* Nonzero if chip supports integer division instruction. */
832 int arm_arch_arm_hwdiv;
833 int arm_arch_thumb_hwdiv;
835 /* Nonzero if we should use Neon to handle 64-bits operations rather
836 than core registers. */
837 int prefer_neon_for_64bits = 0;
839 /* Nonzero if we shouldn't use literal pools. */
840 bool arm_disable_literal_pool = false;
842 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
843 we must report the mode of the memory reference from
844 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
845 machine_mode output_memory_reference_mode;
847 /* The register number to be used for the PIC offset register. */
848 unsigned arm_pic_register = INVALID_REGNUM;
850 enum arm_pcs arm_pcs_default;
852 /* For an explanation of these variables, see final_prescan_insn below. */
853 int arm_ccfsm_state;
854 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
855 enum arm_cond_code arm_current_cc;
857 rtx arm_target_insn;
858 int arm_target_label;
859 /* The number of conditionally executed insns, including the current insn. */
860 int arm_condexec_count = 0;
861 /* A bitmask specifying the patterns for the IT block.
862 Zero means do not output an IT block before this insn. */
863 int arm_condexec_mask = 0;
864 /* The number of bits used in arm_condexec_mask. */
865 int arm_condexec_masklen = 0;
867 /* Nonzero if chip supports the ARMv8 CRC instructions. */
868 int arm_arch_crc = 0;
870 /* Nonzero if the core has a very small, high-latency, multiply unit. */
871 int arm_m_profile_small_mul = 0;
873 /* The condition codes of the ARM, and the inverse function. */
874 static const char * const arm_condition_codes[] =
876 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
877 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
880 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
881 int arm_regs_in_sequence[] =
883 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
886 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
887 #define streq(string1, string2) (strcmp (string1, string2) == 0)
889 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
890 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
891 | (1 << PIC_OFFSET_TABLE_REGNUM)))
893 /* Initialization code. */
895 struct processors
897 const char *const name;
898 enum processor_type core;
899 const char *arch;
900 enum base_architecture base_arch;
901 const unsigned long flags;
902 const struct tune_params *const tune;
906 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
907 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
908 prefetch_slots, \
909 l1_size, \
910 l1_line_size
912 /* arm generic vectorizer costs. */
913 static const
914 struct cpu_vec_costs arm_default_vec_cost = {
915 1, /* scalar_stmt_cost. */
916 1, /* scalar load_cost. */
917 1, /* scalar_store_cost. */
918 1, /* vec_stmt_cost. */
919 1, /* vec_to_scalar_cost. */
920 1, /* scalar_to_vec_cost. */
921 1, /* vec_align_load_cost. */
922 1, /* vec_unalign_load_cost. */
923 1, /* vec_unalign_store_cost. */
924 1, /* vec_store_cost. */
925 3, /* cond_taken_branch_cost. */
926 1, /* cond_not_taken_branch_cost. */
929 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
930 #include "aarch-cost-tables.h"
934 const struct cpu_cost_table cortexa9_extra_costs =
936 /* ALU */
938 0, /* arith. */
939 0, /* logical. */
940 0, /* shift. */
941 COSTS_N_INSNS (1), /* shift_reg. */
942 COSTS_N_INSNS (1), /* arith_shift. */
943 COSTS_N_INSNS (2), /* arith_shift_reg. */
944 0, /* log_shift. */
945 COSTS_N_INSNS (1), /* log_shift_reg. */
946 COSTS_N_INSNS (1), /* extend. */
947 COSTS_N_INSNS (2), /* extend_arith. */
948 COSTS_N_INSNS (1), /* bfi. */
949 COSTS_N_INSNS (1), /* bfx. */
950 0, /* clz. */
951 0, /* rev. */
952 0, /* non_exec. */
953 true /* non_exec_costs_exec. */
956 /* MULT SImode */
958 COSTS_N_INSNS (3), /* simple. */
959 COSTS_N_INSNS (3), /* flag_setting. */
960 COSTS_N_INSNS (2), /* extend. */
961 COSTS_N_INSNS (3), /* add. */
962 COSTS_N_INSNS (2), /* extend_add. */
963 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
965 /* MULT DImode */
967 0, /* simple (N/A). */
968 0, /* flag_setting (N/A). */
969 COSTS_N_INSNS (4), /* extend. */
970 0, /* add (N/A). */
971 COSTS_N_INSNS (4), /* extend_add. */
972 0 /* idiv (N/A). */
975 /* LD/ST */
977 COSTS_N_INSNS (2), /* load. */
978 COSTS_N_INSNS (2), /* load_sign_extend. */
979 COSTS_N_INSNS (2), /* ldrd. */
980 COSTS_N_INSNS (2), /* ldm_1st. */
981 1, /* ldm_regs_per_insn_1st. */
982 2, /* ldm_regs_per_insn_subsequent. */
983 COSTS_N_INSNS (5), /* loadf. */
984 COSTS_N_INSNS (5), /* loadd. */
985 COSTS_N_INSNS (1), /* load_unaligned. */
986 COSTS_N_INSNS (2), /* store. */
987 COSTS_N_INSNS (2), /* strd. */
988 COSTS_N_INSNS (2), /* stm_1st. */
989 1, /* stm_regs_per_insn_1st. */
990 2, /* stm_regs_per_insn_subsequent. */
991 COSTS_N_INSNS (1), /* storef. */
992 COSTS_N_INSNS (1), /* stored. */
993 COSTS_N_INSNS (1) /* store_unaligned. */
996 /* FP SFmode */
998 COSTS_N_INSNS (14), /* div. */
999 COSTS_N_INSNS (4), /* mult. */
1000 COSTS_N_INSNS (7), /* mult_addsub. */
1001 COSTS_N_INSNS (30), /* fma. */
1002 COSTS_N_INSNS (3), /* addsub. */
1003 COSTS_N_INSNS (1), /* fpconst. */
1004 COSTS_N_INSNS (1), /* neg. */
1005 COSTS_N_INSNS (3), /* compare. */
1006 COSTS_N_INSNS (3), /* widen. */
1007 COSTS_N_INSNS (3), /* narrow. */
1008 COSTS_N_INSNS (3), /* toint. */
1009 COSTS_N_INSNS (3), /* fromint. */
1010 COSTS_N_INSNS (3) /* roundint. */
1012 /* FP DFmode */
1014 COSTS_N_INSNS (24), /* div. */
1015 COSTS_N_INSNS (5), /* mult. */
1016 COSTS_N_INSNS (8), /* mult_addsub. */
1017 COSTS_N_INSNS (30), /* fma. */
1018 COSTS_N_INSNS (3), /* addsub. */
1019 COSTS_N_INSNS (1), /* fpconst. */
1020 COSTS_N_INSNS (1), /* neg. */
1021 COSTS_N_INSNS (3), /* compare. */
1022 COSTS_N_INSNS (3), /* widen. */
1023 COSTS_N_INSNS (3), /* narrow. */
1024 COSTS_N_INSNS (3), /* toint. */
1025 COSTS_N_INSNS (3), /* fromint. */
1026 COSTS_N_INSNS (3) /* roundint. */
1029 /* Vector */
1031 COSTS_N_INSNS (1) /* alu. */
1035 const struct cpu_cost_table cortexa8_extra_costs =
1037 /* ALU */
1039 0, /* arith. */
1040 0, /* logical. */
1041 COSTS_N_INSNS (1), /* shift. */
1042 0, /* shift_reg. */
1043 COSTS_N_INSNS (1), /* arith_shift. */
1044 0, /* arith_shift_reg. */
1045 COSTS_N_INSNS (1), /* log_shift. */
1046 0, /* log_shift_reg. */
1047 0, /* extend. */
1048 0, /* extend_arith. */
1049 0, /* bfi. */
1050 0, /* bfx. */
1051 0, /* clz. */
1052 0, /* rev. */
1053 0, /* non_exec. */
1054 true /* non_exec_costs_exec. */
1057 /* MULT SImode */
1059 COSTS_N_INSNS (1), /* simple. */
1060 COSTS_N_INSNS (1), /* flag_setting. */
1061 COSTS_N_INSNS (1), /* extend. */
1062 COSTS_N_INSNS (1), /* add. */
1063 COSTS_N_INSNS (1), /* extend_add. */
1064 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1066 /* MULT DImode */
1068 0, /* simple (N/A). */
1069 0, /* flag_setting (N/A). */
1070 COSTS_N_INSNS (2), /* extend. */
1071 0, /* add (N/A). */
1072 COSTS_N_INSNS (2), /* extend_add. */
1073 0 /* idiv (N/A). */
1076 /* LD/ST */
1078 COSTS_N_INSNS (1), /* load. */
1079 COSTS_N_INSNS (1), /* load_sign_extend. */
1080 COSTS_N_INSNS (1), /* ldrd. */
1081 COSTS_N_INSNS (1), /* ldm_1st. */
1082 1, /* ldm_regs_per_insn_1st. */
1083 2, /* ldm_regs_per_insn_subsequent. */
1084 COSTS_N_INSNS (1), /* loadf. */
1085 COSTS_N_INSNS (1), /* loadd. */
1086 COSTS_N_INSNS (1), /* load_unaligned. */
1087 COSTS_N_INSNS (1), /* store. */
1088 COSTS_N_INSNS (1), /* strd. */
1089 COSTS_N_INSNS (1), /* stm_1st. */
1090 1, /* stm_regs_per_insn_1st. */
1091 2, /* stm_regs_per_insn_subsequent. */
1092 COSTS_N_INSNS (1), /* storef. */
1093 COSTS_N_INSNS (1), /* stored. */
1094 COSTS_N_INSNS (1) /* store_unaligned. */
1097 /* FP SFmode */
1099 COSTS_N_INSNS (36), /* div. */
1100 COSTS_N_INSNS (11), /* mult. */
1101 COSTS_N_INSNS (20), /* mult_addsub. */
1102 COSTS_N_INSNS (30), /* fma. */
1103 COSTS_N_INSNS (9), /* addsub. */
1104 COSTS_N_INSNS (3), /* fpconst. */
1105 COSTS_N_INSNS (3), /* neg. */
1106 COSTS_N_INSNS (6), /* compare. */
1107 COSTS_N_INSNS (4), /* widen. */
1108 COSTS_N_INSNS (4), /* narrow. */
1109 COSTS_N_INSNS (8), /* toint. */
1110 COSTS_N_INSNS (8), /* fromint. */
1111 COSTS_N_INSNS (8) /* roundint. */
1113 /* FP DFmode */
1115 COSTS_N_INSNS (64), /* div. */
1116 COSTS_N_INSNS (16), /* mult. */
1117 COSTS_N_INSNS (25), /* mult_addsub. */
1118 COSTS_N_INSNS (30), /* fma. */
1119 COSTS_N_INSNS (9), /* addsub. */
1120 COSTS_N_INSNS (3), /* fpconst. */
1121 COSTS_N_INSNS (3), /* neg. */
1122 COSTS_N_INSNS (6), /* compare. */
1123 COSTS_N_INSNS (6), /* widen. */
1124 COSTS_N_INSNS (6), /* narrow. */
1125 COSTS_N_INSNS (8), /* toint. */
1126 COSTS_N_INSNS (8), /* fromint. */
1127 COSTS_N_INSNS (8) /* roundint. */
1130 /* Vector */
1132 COSTS_N_INSNS (1) /* alu. */
1136 const struct cpu_cost_table cortexa5_extra_costs =
1138 /* ALU */
1140 0, /* arith. */
1141 0, /* logical. */
1142 COSTS_N_INSNS (1), /* shift. */
1143 COSTS_N_INSNS (1), /* shift_reg. */
1144 COSTS_N_INSNS (1), /* arith_shift. */
1145 COSTS_N_INSNS (1), /* arith_shift_reg. */
1146 COSTS_N_INSNS (1), /* log_shift. */
1147 COSTS_N_INSNS (1), /* log_shift_reg. */
1148 COSTS_N_INSNS (1), /* extend. */
1149 COSTS_N_INSNS (1), /* extend_arith. */
1150 COSTS_N_INSNS (1), /* bfi. */
1151 COSTS_N_INSNS (1), /* bfx. */
1152 COSTS_N_INSNS (1), /* clz. */
1153 COSTS_N_INSNS (1), /* rev. */
1154 0, /* non_exec. */
1155 true /* non_exec_costs_exec. */
1159 /* MULT SImode */
1161 0, /* simple. */
1162 COSTS_N_INSNS (1), /* flag_setting. */
1163 COSTS_N_INSNS (1), /* extend. */
1164 COSTS_N_INSNS (1), /* add. */
1165 COSTS_N_INSNS (1), /* extend_add. */
1166 COSTS_N_INSNS (7) /* idiv. */
1168 /* MULT DImode */
1170 0, /* simple (N/A). */
1171 0, /* flag_setting (N/A). */
1172 COSTS_N_INSNS (1), /* extend. */
1173 0, /* add. */
1174 COSTS_N_INSNS (2), /* extend_add. */
1175 0 /* idiv (N/A). */
1178 /* LD/ST */
1180 COSTS_N_INSNS (1), /* load. */
1181 COSTS_N_INSNS (1), /* load_sign_extend. */
1182 COSTS_N_INSNS (6), /* ldrd. */
1183 COSTS_N_INSNS (1), /* ldm_1st. */
1184 1, /* ldm_regs_per_insn_1st. */
1185 2, /* ldm_regs_per_insn_subsequent. */
1186 COSTS_N_INSNS (2), /* loadf. */
1187 COSTS_N_INSNS (4), /* loadd. */
1188 COSTS_N_INSNS (1), /* load_unaligned. */
1189 COSTS_N_INSNS (1), /* store. */
1190 COSTS_N_INSNS (3), /* strd. */
1191 COSTS_N_INSNS (1), /* stm_1st. */
1192 1, /* stm_regs_per_insn_1st. */
1193 2, /* stm_regs_per_insn_subsequent. */
1194 COSTS_N_INSNS (2), /* storef. */
1195 COSTS_N_INSNS (2), /* stored. */
1196 COSTS_N_INSNS (1) /* store_unaligned. */
1199 /* FP SFmode */
1201 COSTS_N_INSNS (15), /* div. */
1202 COSTS_N_INSNS (3), /* mult. */
1203 COSTS_N_INSNS (7), /* mult_addsub. */
1204 COSTS_N_INSNS (7), /* fma. */
1205 COSTS_N_INSNS (3), /* addsub. */
1206 COSTS_N_INSNS (3), /* fpconst. */
1207 COSTS_N_INSNS (3), /* neg. */
1208 COSTS_N_INSNS (3), /* compare. */
1209 COSTS_N_INSNS (3), /* widen. */
1210 COSTS_N_INSNS (3), /* narrow. */
1211 COSTS_N_INSNS (3), /* toint. */
1212 COSTS_N_INSNS (3), /* fromint. */
1213 COSTS_N_INSNS (3) /* roundint. */
1215 /* FP DFmode */
1217 COSTS_N_INSNS (30), /* div. */
1218 COSTS_N_INSNS (6), /* mult. */
1219 COSTS_N_INSNS (10), /* mult_addsub. */
1220 COSTS_N_INSNS (7), /* fma. */
1221 COSTS_N_INSNS (3), /* addsub. */
1222 COSTS_N_INSNS (3), /* fpconst. */
1223 COSTS_N_INSNS (3), /* neg. */
1224 COSTS_N_INSNS (3), /* compare. */
1225 COSTS_N_INSNS (3), /* widen. */
1226 COSTS_N_INSNS (3), /* narrow. */
1227 COSTS_N_INSNS (3), /* toint. */
1228 COSTS_N_INSNS (3), /* fromint. */
1229 COSTS_N_INSNS (3) /* roundint. */
1232 /* Vector */
1234 COSTS_N_INSNS (1) /* alu. */
1239 const struct cpu_cost_table cortexa7_extra_costs =
1241 /* ALU */
1243 0, /* arith. */
1244 0, /* logical. */
1245 COSTS_N_INSNS (1), /* shift. */
1246 COSTS_N_INSNS (1), /* shift_reg. */
1247 COSTS_N_INSNS (1), /* arith_shift. */
1248 COSTS_N_INSNS (1), /* arith_shift_reg. */
1249 COSTS_N_INSNS (1), /* log_shift. */
1250 COSTS_N_INSNS (1), /* log_shift_reg. */
1251 COSTS_N_INSNS (1), /* extend. */
1252 COSTS_N_INSNS (1), /* extend_arith. */
1253 COSTS_N_INSNS (1), /* bfi. */
1254 COSTS_N_INSNS (1), /* bfx. */
1255 COSTS_N_INSNS (1), /* clz. */
1256 COSTS_N_INSNS (1), /* rev. */
1257 0, /* non_exec. */
1258 true /* non_exec_costs_exec. */
1262 /* MULT SImode */
1264 0, /* simple. */
1265 COSTS_N_INSNS (1), /* flag_setting. */
1266 COSTS_N_INSNS (1), /* extend. */
1267 COSTS_N_INSNS (1), /* add. */
1268 COSTS_N_INSNS (1), /* extend_add. */
1269 COSTS_N_INSNS (7) /* idiv. */
1271 /* MULT DImode */
1273 0, /* simple (N/A). */
1274 0, /* flag_setting (N/A). */
1275 COSTS_N_INSNS (1), /* extend. */
1276 0, /* add. */
1277 COSTS_N_INSNS (2), /* extend_add. */
1278 0 /* idiv (N/A). */
1281 /* LD/ST */
1283 COSTS_N_INSNS (1), /* load. */
1284 COSTS_N_INSNS (1), /* load_sign_extend. */
1285 COSTS_N_INSNS (3), /* ldrd. */
1286 COSTS_N_INSNS (1), /* ldm_1st. */
1287 1, /* ldm_regs_per_insn_1st. */
1288 2, /* ldm_regs_per_insn_subsequent. */
1289 COSTS_N_INSNS (2), /* loadf. */
1290 COSTS_N_INSNS (2), /* loadd. */
1291 COSTS_N_INSNS (1), /* load_unaligned. */
1292 COSTS_N_INSNS (1), /* store. */
1293 COSTS_N_INSNS (3), /* strd. */
1294 COSTS_N_INSNS (1), /* stm_1st. */
1295 1, /* stm_regs_per_insn_1st. */
1296 2, /* stm_regs_per_insn_subsequent. */
1297 COSTS_N_INSNS (2), /* storef. */
1298 COSTS_N_INSNS (2), /* stored. */
1299 COSTS_N_INSNS (1) /* store_unaligned. */
1302 /* FP SFmode */
1304 COSTS_N_INSNS (15), /* div. */
1305 COSTS_N_INSNS (3), /* mult. */
1306 COSTS_N_INSNS (7), /* mult_addsub. */
1307 COSTS_N_INSNS (7), /* fma. */
1308 COSTS_N_INSNS (3), /* addsub. */
1309 COSTS_N_INSNS (3), /* fpconst. */
1310 COSTS_N_INSNS (3), /* neg. */
1311 COSTS_N_INSNS (3), /* compare. */
1312 COSTS_N_INSNS (3), /* widen. */
1313 COSTS_N_INSNS (3), /* narrow. */
1314 COSTS_N_INSNS (3), /* toint. */
1315 COSTS_N_INSNS (3), /* fromint. */
1316 COSTS_N_INSNS (3) /* roundint. */
1318 /* FP DFmode */
1320 COSTS_N_INSNS (30), /* div. */
1321 COSTS_N_INSNS (6), /* mult. */
1322 COSTS_N_INSNS (10), /* mult_addsub. */
1323 COSTS_N_INSNS (7), /* fma. */
1324 COSTS_N_INSNS (3), /* addsub. */
1325 COSTS_N_INSNS (3), /* fpconst. */
1326 COSTS_N_INSNS (3), /* neg. */
1327 COSTS_N_INSNS (3), /* compare. */
1328 COSTS_N_INSNS (3), /* widen. */
1329 COSTS_N_INSNS (3), /* narrow. */
1330 COSTS_N_INSNS (3), /* toint. */
1331 COSTS_N_INSNS (3), /* fromint. */
1332 COSTS_N_INSNS (3) /* roundint. */
1335 /* Vector */
1337 COSTS_N_INSNS (1) /* alu. */
1341 const struct cpu_cost_table cortexa12_extra_costs =
1343 /* ALU */
1345 0, /* arith. */
1346 0, /* logical. */
1347 0, /* shift. */
1348 COSTS_N_INSNS (1), /* shift_reg. */
1349 COSTS_N_INSNS (1), /* arith_shift. */
1350 COSTS_N_INSNS (1), /* arith_shift_reg. */
1351 COSTS_N_INSNS (1), /* log_shift. */
1352 COSTS_N_INSNS (1), /* log_shift_reg. */
1353 0, /* extend. */
1354 COSTS_N_INSNS (1), /* extend_arith. */
1355 0, /* bfi. */
1356 COSTS_N_INSNS (1), /* bfx. */
1357 COSTS_N_INSNS (1), /* clz. */
1358 COSTS_N_INSNS (1), /* rev. */
1359 0, /* non_exec. */
1360 true /* non_exec_costs_exec. */
1362 /* MULT SImode */
1365 COSTS_N_INSNS (2), /* simple. */
1366 COSTS_N_INSNS (3), /* flag_setting. */
1367 COSTS_N_INSNS (2), /* extend. */
1368 COSTS_N_INSNS (3), /* add. */
1369 COSTS_N_INSNS (2), /* extend_add. */
1370 COSTS_N_INSNS (18) /* idiv. */
1372 /* MULT DImode */
1374 0, /* simple (N/A). */
1375 0, /* flag_setting (N/A). */
1376 COSTS_N_INSNS (3), /* extend. */
1377 0, /* add (N/A). */
1378 COSTS_N_INSNS (3), /* extend_add. */
1379 0 /* idiv (N/A). */
1382 /* LD/ST */
1384 COSTS_N_INSNS (3), /* load. */
1385 COSTS_N_INSNS (3), /* load_sign_extend. */
1386 COSTS_N_INSNS (3), /* ldrd. */
1387 COSTS_N_INSNS (3), /* ldm_1st. */
1388 1, /* ldm_regs_per_insn_1st. */
1389 2, /* ldm_regs_per_insn_subsequent. */
1390 COSTS_N_INSNS (3), /* loadf. */
1391 COSTS_N_INSNS (3), /* loadd. */
1392 0, /* load_unaligned. */
1393 0, /* store. */
1394 0, /* strd. */
1395 0, /* stm_1st. */
1396 1, /* stm_regs_per_insn_1st. */
1397 2, /* stm_regs_per_insn_subsequent. */
1398 COSTS_N_INSNS (2), /* storef. */
1399 COSTS_N_INSNS (2), /* stored. */
1400 0 /* store_unaligned. */
1403 /* FP SFmode */
1405 COSTS_N_INSNS (17), /* div. */
1406 COSTS_N_INSNS (4), /* mult. */
1407 COSTS_N_INSNS (8), /* mult_addsub. */
1408 COSTS_N_INSNS (8), /* fma. */
1409 COSTS_N_INSNS (4), /* addsub. */
1410 COSTS_N_INSNS (2), /* fpconst. */
1411 COSTS_N_INSNS (2), /* neg. */
1412 COSTS_N_INSNS (2), /* compare. */
1413 COSTS_N_INSNS (4), /* widen. */
1414 COSTS_N_INSNS (4), /* narrow. */
1415 COSTS_N_INSNS (4), /* toint. */
1416 COSTS_N_INSNS (4), /* fromint. */
1417 COSTS_N_INSNS (4) /* roundint. */
1419 /* FP DFmode */
1421 COSTS_N_INSNS (31), /* div. */
1422 COSTS_N_INSNS (4), /* mult. */
1423 COSTS_N_INSNS (8), /* mult_addsub. */
1424 COSTS_N_INSNS (8), /* fma. */
1425 COSTS_N_INSNS (4), /* addsub. */
1426 COSTS_N_INSNS (2), /* fpconst. */
1427 COSTS_N_INSNS (2), /* neg. */
1428 COSTS_N_INSNS (2), /* compare. */
1429 COSTS_N_INSNS (4), /* widen. */
1430 COSTS_N_INSNS (4), /* narrow. */
1431 COSTS_N_INSNS (4), /* toint. */
1432 COSTS_N_INSNS (4), /* fromint. */
1433 COSTS_N_INSNS (4) /* roundint. */
1436 /* Vector */
1438 COSTS_N_INSNS (1) /* alu. */
1442 const struct cpu_cost_table cortexa15_extra_costs =
1444 /* ALU */
1446 0, /* arith. */
1447 0, /* logical. */
1448 0, /* shift. */
1449 0, /* shift_reg. */
1450 COSTS_N_INSNS (1), /* arith_shift. */
1451 COSTS_N_INSNS (1), /* arith_shift_reg. */
1452 COSTS_N_INSNS (1), /* log_shift. */
1453 COSTS_N_INSNS (1), /* log_shift_reg. */
1454 0, /* extend. */
1455 COSTS_N_INSNS (1), /* extend_arith. */
1456 COSTS_N_INSNS (1), /* bfi. */
1457 0, /* bfx. */
1458 0, /* clz. */
1459 0, /* rev. */
1460 0, /* non_exec. */
1461 true /* non_exec_costs_exec. */
1463 /* MULT SImode */
1466 COSTS_N_INSNS (2), /* simple. */
1467 COSTS_N_INSNS (3), /* flag_setting. */
1468 COSTS_N_INSNS (2), /* extend. */
1469 COSTS_N_INSNS (2), /* add. */
1470 COSTS_N_INSNS (2), /* extend_add. */
1471 COSTS_N_INSNS (18) /* idiv. */
1473 /* MULT DImode */
1475 0, /* simple (N/A). */
1476 0, /* flag_setting (N/A). */
1477 COSTS_N_INSNS (3), /* extend. */
1478 0, /* add (N/A). */
1479 COSTS_N_INSNS (3), /* extend_add. */
1480 0 /* idiv (N/A). */
1483 /* LD/ST */
1485 COSTS_N_INSNS (3), /* load. */
1486 COSTS_N_INSNS (3), /* load_sign_extend. */
1487 COSTS_N_INSNS (3), /* ldrd. */
1488 COSTS_N_INSNS (4), /* ldm_1st. */
1489 1, /* ldm_regs_per_insn_1st. */
1490 2, /* ldm_regs_per_insn_subsequent. */
1491 COSTS_N_INSNS (4), /* loadf. */
1492 COSTS_N_INSNS (4), /* loadd. */
1493 0, /* load_unaligned. */
1494 0, /* store. */
1495 0, /* strd. */
1496 COSTS_N_INSNS (1), /* stm_1st. */
1497 1, /* stm_regs_per_insn_1st. */
1498 2, /* stm_regs_per_insn_subsequent. */
1499 0, /* storef. */
1500 0, /* stored. */
1501 0 /* store_unaligned. */
1504 /* FP SFmode */
1506 COSTS_N_INSNS (17), /* div. */
1507 COSTS_N_INSNS (4), /* mult. */
1508 COSTS_N_INSNS (8), /* mult_addsub. */
1509 COSTS_N_INSNS (8), /* fma. */
1510 COSTS_N_INSNS (4), /* addsub. */
1511 COSTS_N_INSNS (2), /* fpconst. */
1512 COSTS_N_INSNS (2), /* neg. */
1513 COSTS_N_INSNS (5), /* compare. */
1514 COSTS_N_INSNS (4), /* widen. */
1515 COSTS_N_INSNS (4), /* narrow. */
1516 COSTS_N_INSNS (4), /* toint. */
1517 COSTS_N_INSNS (4), /* fromint. */
1518 COSTS_N_INSNS (4) /* roundint. */
1520 /* FP DFmode */
1522 COSTS_N_INSNS (31), /* div. */
1523 COSTS_N_INSNS (4), /* mult. */
1524 COSTS_N_INSNS (8), /* mult_addsub. */
1525 COSTS_N_INSNS (8), /* fma. */
1526 COSTS_N_INSNS (4), /* addsub. */
1527 COSTS_N_INSNS (2), /* fpconst. */
1528 COSTS_N_INSNS (2), /* neg. */
1529 COSTS_N_INSNS (2), /* compare. */
1530 COSTS_N_INSNS (4), /* widen. */
1531 COSTS_N_INSNS (4), /* narrow. */
1532 COSTS_N_INSNS (4), /* toint. */
1533 COSTS_N_INSNS (4), /* fromint. */
1534 COSTS_N_INSNS (4) /* roundint. */
1537 /* Vector */
1539 COSTS_N_INSNS (1) /* alu. */
1543 const struct cpu_cost_table v7m_extra_costs =
1545 /* ALU */
1547 0, /* arith. */
1548 0, /* logical. */
1549 0, /* shift. */
1550 0, /* shift_reg. */
1551 0, /* arith_shift. */
1552 COSTS_N_INSNS (1), /* arith_shift_reg. */
1553 0, /* log_shift. */
1554 COSTS_N_INSNS (1), /* log_shift_reg. */
1555 0, /* extend. */
1556 COSTS_N_INSNS (1), /* extend_arith. */
1557 0, /* bfi. */
1558 0, /* bfx. */
1559 0, /* clz. */
1560 0, /* rev. */
1561 COSTS_N_INSNS (1), /* non_exec. */
1562 false /* non_exec_costs_exec. */
1565 /* MULT SImode */
1567 COSTS_N_INSNS (1), /* simple. */
1568 COSTS_N_INSNS (1), /* flag_setting. */
1569 COSTS_N_INSNS (2), /* extend. */
1570 COSTS_N_INSNS (1), /* add. */
1571 COSTS_N_INSNS (3), /* extend_add. */
1572 COSTS_N_INSNS (8) /* idiv. */
1574 /* MULT DImode */
1576 0, /* simple (N/A). */
1577 0, /* flag_setting (N/A). */
1578 COSTS_N_INSNS (2), /* extend. */
1579 0, /* add (N/A). */
1580 COSTS_N_INSNS (3), /* extend_add. */
1581 0 /* idiv (N/A). */
1584 /* LD/ST */
1586 COSTS_N_INSNS (2), /* load. */
1587 0, /* load_sign_extend. */
1588 COSTS_N_INSNS (3), /* ldrd. */
1589 COSTS_N_INSNS (2), /* ldm_1st. */
1590 1, /* ldm_regs_per_insn_1st. */
1591 1, /* ldm_regs_per_insn_subsequent. */
1592 COSTS_N_INSNS (2), /* loadf. */
1593 COSTS_N_INSNS (3), /* loadd. */
1594 COSTS_N_INSNS (1), /* load_unaligned. */
1595 COSTS_N_INSNS (2), /* store. */
1596 COSTS_N_INSNS (3), /* strd. */
1597 COSTS_N_INSNS (2), /* stm_1st. */
1598 1, /* stm_regs_per_insn_1st. */
1599 1, /* stm_regs_per_insn_subsequent. */
1600 COSTS_N_INSNS (2), /* storef. */
1601 COSTS_N_INSNS (3), /* stored. */
1602 COSTS_N_INSNS (1) /* store_unaligned. */
1605 /* FP SFmode */
1607 COSTS_N_INSNS (7), /* div. */
1608 COSTS_N_INSNS (2), /* mult. */
1609 COSTS_N_INSNS (5), /* mult_addsub. */
1610 COSTS_N_INSNS (3), /* fma. */
1611 COSTS_N_INSNS (1), /* addsub. */
1612 0, /* fpconst. */
1613 0, /* neg. */
1614 0, /* compare. */
1615 0, /* widen. */
1616 0, /* narrow. */
1617 0, /* toint. */
1618 0, /* fromint. */
1619 0 /* roundint. */
1621 /* FP DFmode */
1623 COSTS_N_INSNS (15), /* div. */
1624 COSTS_N_INSNS (5), /* mult. */
1625 COSTS_N_INSNS (7), /* mult_addsub. */
1626 COSTS_N_INSNS (7), /* fma. */
1627 COSTS_N_INSNS (3), /* addsub. */
1628 0, /* fpconst. */
1629 0, /* neg. */
1630 0, /* compare. */
1631 0, /* widen. */
1632 0, /* narrow. */
1633 0, /* toint. */
1634 0, /* fromint. */
1635 0 /* roundint. */
1638 /* Vector */
1640 COSTS_N_INSNS (1) /* alu. */
1644 const struct tune_params arm_slowmul_tune =
1646 arm_slowmul_rtx_costs,
1647 NULL,
1648 NULL, /* Sched adj cost. */
1649 3, /* Constant limit. */
1650 5, /* Max cond insns. */
1651 ARM_PREFETCH_NOT_BENEFICIAL,
1652 true, /* Prefer constant pool. */
1653 arm_default_branch_cost,
1654 false, /* Prefer LDRD/STRD. */
1655 {true, true}, /* Prefer non short circuit. */
1656 &arm_default_vec_cost, /* Vectorizer costs. */
1657 false, /* Prefer Neon for 64-bits bitops. */
1658 false, false, /* Prefer 32-bit encodings. */
1659 false, /* Prefer Neon for stringops. */
1660 8 /* Maximum insns to inline memset. */
1663 const struct tune_params arm_fastmul_tune =
1665 arm_fastmul_rtx_costs,
1666 NULL,
1667 NULL, /* Sched adj cost. */
1668 1, /* Constant limit. */
1669 5, /* Max cond insns. */
1670 ARM_PREFETCH_NOT_BENEFICIAL,
1671 true, /* Prefer constant pool. */
1672 arm_default_branch_cost,
1673 false, /* Prefer LDRD/STRD. */
1674 {true, true}, /* Prefer non short circuit. */
1675 &arm_default_vec_cost, /* Vectorizer costs. */
1676 false, /* Prefer Neon for 64-bits bitops. */
1677 false, false, /* Prefer 32-bit encodings. */
1678 false, /* Prefer Neon for stringops. */
1679 8 /* Maximum insns to inline memset. */
1682 /* StrongARM has early execution of branches, so a sequence that is worth
1683 skipping is shorter. Set max_insns_skipped to a lower value. */
1685 const struct tune_params arm_strongarm_tune =
1687 arm_fastmul_rtx_costs,
1688 NULL,
1689 NULL, /* Sched adj cost. */
1690 1, /* Constant limit. */
1691 3, /* Max cond insns. */
1692 ARM_PREFETCH_NOT_BENEFICIAL,
1693 true, /* Prefer constant pool. */
1694 arm_default_branch_cost,
1695 false, /* Prefer LDRD/STRD. */
1696 {true, true}, /* Prefer non short circuit. */
1697 &arm_default_vec_cost, /* Vectorizer costs. */
1698 false, /* Prefer Neon for 64-bits bitops. */
1699 false, false, /* Prefer 32-bit encodings. */
1700 false, /* Prefer Neon for stringops. */
1701 8 /* Maximum insns to inline memset. */
1704 const struct tune_params arm_xscale_tune =
1706 arm_xscale_rtx_costs,
1707 NULL,
1708 xscale_sched_adjust_cost,
1709 2, /* Constant limit. */
1710 3, /* Max cond insns. */
1711 ARM_PREFETCH_NOT_BENEFICIAL,
1712 true, /* Prefer constant pool. */
1713 arm_default_branch_cost,
1714 false, /* Prefer LDRD/STRD. */
1715 {true, true}, /* Prefer non short circuit. */
1716 &arm_default_vec_cost, /* Vectorizer costs. */
1717 false, /* Prefer Neon for 64-bits bitops. */
1718 false, false, /* Prefer 32-bit encodings. */
1719 false, /* Prefer Neon for stringops. */
1720 8 /* Maximum insns to inline memset. */
1723 const struct tune_params arm_9e_tune =
1725 arm_9e_rtx_costs,
1726 NULL,
1727 NULL, /* Sched adj cost. */
1728 1, /* Constant limit. */
1729 5, /* Max cond insns. */
1730 ARM_PREFETCH_NOT_BENEFICIAL,
1731 true, /* Prefer constant pool. */
1732 arm_default_branch_cost,
1733 false, /* Prefer LDRD/STRD. */
1734 {true, true}, /* Prefer non short circuit. */
1735 &arm_default_vec_cost, /* Vectorizer costs. */
1736 false, /* Prefer Neon for 64-bits bitops. */
1737 false, false, /* Prefer 32-bit encodings. */
1738 false, /* Prefer Neon for stringops. */
1739 8 /* Maximum insns to inline memset. */
1742 const struct tune_params arm_v6t2_tune =
1744 arm_9e_rtx_costs,
1745 NULL,
1746 NULL, /* Sched adj cost. */
1747 1, /* Constant limit. */
1748 5, /* Max cond insns. */
1749 ARM_PREFETCH_NOT_BENEFICIAL,
1750 false, /* Prefer constant pool. */
1751 arm_default_branch_cost,
1752 false, /* Prefer LDRD/STRD. */
1753 {true, true}, /* Prefer non short circuit. */
1754 &arm_default_vec_cost, /* Vectorizer costs. */
1755 false, /* Prefer Neon for 64-bits bitops. */
1756 false, false, /* Prefer 32-bit encodings. */
1757 false, /* Prefer Neon for stringops. */
1758 8 /* Maximum insns to inline memset. */
1761 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1762 const struct tune_params arm_cortex_tune =
1764 arm_9e_rtx_costs,
1765 &generic_extra_costs,
1766 NULL, /* Sched adj cost. */
1767 1, /* Constant limit. */
1768 5, /* Max cond insns. */
1769 ARM_PREFETCH_NOT_BENEFICIAL,
1770 false, /* Prefer constant pool. */
1771 arm_default_branch_cost,
1772 false, /* Prefer LDRD/STRD. */
1773 {true, true}, /* Prefer non short circuit. */
1774 &arm_default_vec_cost, /* Vectorizer costs. */
1775 false, /* Prefer Neon for 64-bits bitops. */
1776 false, false, /* Prefer 32-bit encodings. */
1777 false, /* Prefer Neon for stringops. */
1778 8 /* Maximum insns to inline memset. */
1781 const struct tune_params arm_cortex_a8_tune =
1783 arm_9e_rtx_costs,
1784 &cortexa8_extra_costs,
1785 NULL, /* Sched adj cost. */
1786 1, /* Constant limit. */
1787 5, /* Max cond insns. */
1788 ARM_PREFETCH_NOT_BENEFICIAL,
1789 false, /* Prefer constant pool. */
1790 arm_default_branch_cost,
1791 false, /* Prefer LDRD/STRD. */
1792 {true, true}, /* Prefer non short circuit. */
1793 &arm_default_vec_cost, /* Vectorizer costs. */
1794 false, /* Prefer Neon for 64-bits bitops. */
1795 false, false, /* Prefer 32-bit encodings. */
1796 true, /* Prefer Neon for stringops. */
1797 8 /* Maximum insns to inline memset. */
1800 const struct tune_params arm_cortex_a7_tune =
1802 arm_9e_rtx_costs,
1803 &cortexa7_extra_costs,
1804 NULL,
1805 1, /* Constant limit. */
1806 5, /* Max cond insns. */
1807 ARM_PREFETCH_NOT_BENEFICIAL,
1808 false, /* Prefer constant pool. */
1809 arm_default_branch_cost,
1810 false, /* Prefer LDRD/STRD. */
1811 {true, true}, /* Prefer non short circuit. */
1812 &arm_default_vec_cost, /* Vectorizer costs. */
1813 false, /* Prefer Neon for 64-bits bitops. */
1814 false, false, /* Prefer 32-bit encodings. */
1815 true, /* Prefer Neon for stringops. */
1816 8 /* Maximum insns to inline memset. */
1819 const struct tune_params arm_cortex_a15_tune =
1821 arm_9e_rtx_costs,
1822 &cortexa15_extra_costs,
1823 NULL, /* Sched adj cost. */
1824 1, /* Constant limit. */
1825 2, /* Max cond insns. */
1826 ARM_PREFETCH_NOT_BENEFICIAL,
1827 false, /* Prefer constant pool. */
1828 arm_default_branch_cost,
1829 true, /* Prefer LDRD/STRD. */
1830 {true, true}, /* Prefer non short circuit. */
1831 &arm_default_vec_cost, /* Vectorizer costs. */
1832 false, /* Prefer Neon for 64-bits bitops. */
1833 true, true, /* Prefer 32-bit encodings. */
1834 true, /* Prefer Neon for stringops. */
1835 8 /* Maximum insns to inline memset. */
1838 const struct tune_params arm_cortex_a53_tune =
1840 arm_9e_rtx_costs,
1841 &cortexa53_extra_costs,
1842 NULL, /* Scheduler cost adjustment. */
1843 1, /* Constant limit. */
1844 5, /* Max cond insns. */
1845 ARM_PREFETCH_NOT_BENEFICIAL,
1846 false, /* Prefer constant pool. */
1847 arm_default_branch_cost,
1848 false, /* Prefer LDRD/STRD. */
1849 {true, true}, /* Prefer non short circuit. */
1850 &arm_default_vec_cost, /* Vectorizer costs. */
1851 false, /* Prefer Neon for 64-bits bitops. */
1852 false, false, /* Prefer 32-bit encodings. */
1853 false, /* Prefer Neon for stringops. */
1854 8 /* Maximum insns to inline memset. */
1857 const struct tune_params arm_cortex_a57_tune =
1859 arm_9e_rtx_costs,
1860 &cortexa57_extra_costs,
1861 NULL, /* Scheduler cost adjustment. */
1862 1, /* Constant limit. */
1863 2, /* Max cond insns. */
1864 ARM_PREFETCH_NOT_BENEFICIAL,
1865 false, /* Prefer constant pool. */
1866 arm_default_branch_cost,
1867 true, /* Prefer LDRD/STRD. */
1868 {true, true}, /* Prefer non short circuit. */
1869 &arm_default_vec_cost, /* Vectorizer costs. */
1870 false, /* Prefer Neon for 64-bits bitops. */
1871 true, true, /* Prefer 32-bit encodings. */
1872 false, /* Prefer Neon for stringops. */
1873 8 /* Maximum insns to inline memset. */
1876 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1877 less appealing. Set max_insns_skipped to a low value. */
1879 const struct tune_params arm_cortex_a5_tune =
1881 arm_9e_rtx_costs,
1882 &cortexa5_extra_costs,
1883 NULL, /* Sched adj cost. */
1884 1, /* Constant limit. */
1885 1, /* Max cond insns. */
1886 ARM_PREFETCH_NOT_BENEFICIAL,
1887 false, /* Prefer constant pool. */
1888 arm_cortex_a5_branch_cost,
1889 false, /* Prefer LDRD/STRD. */
1890 {false, false}, /* Prefer non short circuit. */
1891 &arm_default_vec_cost, /* Vectorizer costs. */
1892 false, /* Prefer Neon for 64-bits bitops. */
1893 false, false, /* Prefer 32-bit encodings. */
1894 true, /* Prefer Neon for stringops. */
1895 8 /* Maximum insns to inline memset. */
1898 const struct tune_params arm_cortex_a9_tune =
1900 arm_9e_rtx_costs,
1901 &cortexa9_extra_costs,
1902 cortex_a9_sched_adjust_cost,
1903 1, /* Constant limit. */
1904 5, /* Max cond insns. */
1905 ARM_PREFETCH_BENEFICIAL(4,32,32),
1906 false, /* Prefer constant pool. */
1907 arm_default_branch_cost,
1908 false, /* Prefer LDRD/STRD. */
1909 {true, true}, /* Prefer non short circuit. */
1910 &arm_default_vec_cost, /* Vectorizer costs. */
1911 false, /* Prefer Neon for 64-bits bitops. */
1912 false, false, /* Prefer 32-bit encodings. */
1913 false, /* Prefer Neon for stringops. */
1914 8 /* Maximum insns to inline memset. */
1917 const struct tune_params arm_cortex_a12_tune =
1919 arm_9e_rtx_costs,
1920 &cortexa12_extra_costs,
1921 NULL,
1922 1, /* Constant limit. */
1923 5, /* Max cond insns. */
1924 ARM_PREFETCH_BENEFICIAL(4,32,32),
1925 false, /* Prefer constant pool. */
1926 arm_default_branch_cost,
1927 true, /* Prefer LDRD/STRD. */
1928 {true, true}, /* Prefer non short circuit. */
1929 &arm_default_vec_cost, /* Vectorizer costs. */
1930 false, /* Prefer Neon for 64-bits bitops. */
1931 false, false, /* Prefer 32-bit encodings. */
1932 true, /* Prefer Neon for stringops. */
1933 8 /* Maximum insns to inline memset. */
1936 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
1937 cycle to execute each. An LDR from the constant pool also takes two cycles
1938 to execute, but mildly increases pipelining opportunity (consecutive
1939 loads/stores can be pipelined together, saving one cycle), and may also
1940 improve icache utilisation. Hence we prefer the constant pool for such
1941 processors. */
1943 const struct tune_params arm_v7m_tune =
1945 arm_9e_rtx_costs,
1946 &v7m_extra_costs,
1947 NULL, /* Sched adj cost. */
1948 1, /* Constant limit. */
1949 2, /* Max cond insns. */
1950 ARM_PREFETCH_NOT_BENEFICIAL,
1951 true, /* Prefer constant pool. */
1952 arm_cortex_m_branch_cost,
1953 false, /* Prefer LDRD/STRD. */
1954 {false, false}, /* Prefer non short circuit. */
1955 &arm_default_vec_cost, /* Vectorizer costs. */
1956 false, /* Prefer Neon for 64-bits bitops. */
1957 false, false, /* Prefer 32-bit encodings. */
1958 false, /* Prefer Neon for stringops. */
1959 8 /* Maximum insns to inline memset. */
1962 /* Cortex-M7 tuning. */
1964 const struct tune_params arm_cortex_m7_tune =
1966 arm_9e_rtx_costs,
1967 &v7m_extra_costs,
1968 NULL, /* Sched adj cost. */
1969 0, /* Constant limit. */
1970 0, /* Max cond insns. */
1971 ARM_PREFETCH_NOT_BENEFICIAL,
1972 true, /* Prefer constant pool. */
1973 arm_cortex_m_branch_cost,
1974 false, /* Prefer LDRD/STRD. */
1975 {true, true}, /* Prefer non short circuit. */
1976 &arm_default_vec_cost, /* Vectorizer costs. */
1977 false, /* Prefer Neon for 64-bits bitops. */
1978 false, false, /* Prefer 32-bit encodings. */
1979 false, /* Prefer Neon for stringops. */
1980 8 /* Maximum insns to inline memset. */
1983 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1984 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1985 const struct tune_params arm_v6m_tune =
1987 arm_9e_rtx_costs,
1988 NULL,
1989 NULL, /* Sched adj cost. */
1990 1, /* Constant limit. */
1991 5, /* Max cond insns. */
1992 ARM_PREFETCH_NOT_BENEFICIAL,
1993 false, /* Prefer constant pool. */
1994 arm_default_branch_cost,
1995 false, /* Prefer LDRD/STRD. */
1996 {false, false}, /* Prefer non short circuit. */
1997 &arm_default_vec_cost, /* Vectorizer costs. */
1998 false, /* Prefer Neon for 64-bits bitops. */
1999 false, false, /* Prefer 32-bit encodings. */
2000 false, /* Prefer Neon for stringops. */
2001 8 /* Maximum insns to inline memset. */
2004 const struct tune_params arm_fa726te_tune =
2006 arm_9e_rtx_costs,
2007 NULL,
2008 fa726te_sched_adjust_cost,
2009 1, /* Constant limit. */
2010 5, /* Max cond insns. */
2011 ARM_PREFETCH_NOT_BENEFICIAL,
2012 true, /* Prefer constant pool. */
2013 arm_default_branch_cost,
2014 false, /* Prefer LDRD/STRD. */
2015 {true, true}, /* Prefer non short circuit. */
2016 &arm_default_vec_cost, /* Vectorizer costs. */
2017 false, /* Prefer Neon for 64-bits bitops. */
2018 false, false, /* Prefer 32-bit encodings. */
2019 false, /* Prefer Neon for stringops. */
2020 8 /* Maximum insns to inline memset. */
2024 /* Not all of these give usefully different compilation alternatives,
2025 but there is no simple way of generalizing them. */
2026 static const struct processors all_cores[] =
2028 /* ARM Cores */
2029 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2030 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2031 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2032 #include "arm-cores.def"
2033 #undef ARM_CORE
2034 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2037 static const struct processors all_architectures[] =
2039 /* ARM Architectures */
2040 /* We don't specify tuning costs here as it will be figured out
2041 from the core. */
2043 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2044 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2045 #include "arm-arches.def"
2046 #undef ARM_ARCH
2047 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2051 /* These are populated as commandline arguments are processed, or NULL
2052 if not specified. */
2053 static const struct processors *arm_selected_arch;
2054 static const struct processors *arm_selected_cpu;
2055 static const struct processors *arm_selected_tune;
2057 /* The name of the preprocessor macro to define for this architecture. */
2059 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
2061 /* Available values for -mfpu=. */
2063 static const struct arm_fpu_desc all_fpus[] =
2065 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2066 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2067 #include "arm-fpus.def"
2068 #undef ARM_FPU
2072 /* Supported TLS relocations. */
2074 enum tls_reloc {
2075 TLS_GD32,
2076 TLS_LDM32,
2077 TLS_LDO32,
2078 TLS_IE32,
2079 TLS_LE32,
2080 TLS_DESCSEQ /* GNU scheme */
2083 /* The maximum number of insns to be used when loading a constant. */
2084 inline static int
2085 arm_constant_limit (bool size_p)
2087 return size_p ? 1 : current_tune->constant_limit;
2090 /* Emit an insn that's a simple single-set. Both the operands must be known
2091 to be valid. */
2092 inline static rtx_insn *
2093 emit_set_insn (rtx x, rtx y)
2095 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
2098 /* Return the number of bits set in VALUE. */
2099 static unsigned
2100 bit_count (unsigned long value)
2102 unsigned long count = 0;
2104 while (value)
2106 count++;
2107 value &= value - 1; /* Clear the least-significant set bit. */
2110 return count;
2113 typedef struct
2115 machine_mode mode;
2116 const char *name;
2117 } arm_fixed_mode_set;
2119 /* A small helper for setting fixed-point library libfuncs. */
2121 static void
2122 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2123 const char *funcname, const char *modename,
2124 int num_suffix)
2126 char buffer[50];
2128 if (num_suffix == 0)
2129 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2130 else
2131 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2133 set_optab_libfunc (optable, mode, buffer);
2136 static void
2137 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2138 machine_mode from, const char *funcname,
2139 const char *toname, const char *fromname)
2141 char buffer[50];
2142 const char *maybe_suffix_2 = "";
2144 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2145 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2146 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2147 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2148 maybe_suffix_2 = "2";
2150 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2151 maybe_suffix_2);
2153 set_conv_libfunc (optable, to, from, buffer);
2156 /* Set up library functions unique to ARM. */
2158 static void
2159 arm_init_libfuncs (void)
2161 /* For Linux, we have access to kernel support for atomic operations. */
2162 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2163 init_sync_libfuncs (2 * UNITS_PER_WORD);
2165 /* There are no special library functions unless we are using the
2166 ARM BPABI. */
2167 if (!TARGET_BPABI)
2168 return;
2170 /* The functions below are described in Section 4 of the "Run-Time
2171 ABI for the ARM architecture", Version 1.0. */
2173 /* Double-precision floating-point arithmetic. Table 2. */
2174 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2175 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2176 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2177 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2178 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2180 /* Double-precision comparisons. Table 3. */
2181 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2182 set_optab_libfunc (ne_optab, DFmode, NULL);
2183 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2184 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2185 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2186 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2187 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2189 /* Single-precision floating-point arithmetic. Table 4. */
2190 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2191 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2192 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2193 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2194 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2196 /* Single-precision comparisons. Table 5. */
2197 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2198 set_optab_libfunc (ne_optab, SFmode, NULL);
2199 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2200 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2201 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2202 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2203 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2205 /* Floating-point to integer conversions. Table 6. */
2206 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2207 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2208 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2209 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2210 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2211 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2212 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2213 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2215 /* Conversions between floating types. Table 7. */
2216 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2217 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2219 /* Integer to floating-point conversions. Table 8. */
2220 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2221 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2222 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2223 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2224 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2225 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2226 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2227 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2229 /* Long long. Table 9. */
2230 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2231 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2232 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2233 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2234 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2235 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2236 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2237 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2239 /* Integer (32/32->32) division. \S 4.3.1. */
2240 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2241 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2243 /* The divmod functions are designed so that they can be used for
2244 plain division, even though they return both the quotient and the
2245 remainder. The quotient is returned in the usual location (i.e.,
2246 r0 for SImode, {r0, r1} for DImode), just as would be expected
2247 for an ordinary division routine. Because the AAPCS calling
2248 conventions specify that all of { r0, r1, r2, r3 } are
2249 callee-saved registers, there is no need to tell the compiler
2250 explicitly that those registers are clobbered by these
2251 routines. */
2252 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2253 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2255 /* For SImode division the ABI provides div-without-mod routines,
2256 which are faster. */
2257 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2258 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2260 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2261 divmod libcalls instead. */
2262 set_optab_libfunc (smod_optab, DImode, NULL);
2263 set_optab_libfunc (umod_optab, DImode, NULL);
2264 set_optab_libfunc (smod_optab, SImode, NULL);
2265 set_optab_libfunc (umod_optab, SImode, NULL);
2267 /* Half-precision float operations. The compiler handles all operations
2268 with NULL libfuncs by converting the SFmode. */
2269 switch (arm_fp16_format)
2271 case ARM_FP16_FORMAT_IEEE:
2272 case ARM_FP16_FORMAT_ALTERNATIVE:
2274 /* Conversions. */
2275 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2276 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2277 ? "__gnu_f2h_ieee"
2278 : "__gnu_f2h_alternative"));
2279 set_conv_libfunc (sext_optab, SFmode, HFmode,
2280 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2281 ? "__gnu_h2f_ieee"
2282 : "__gnu_h2f_alternative"));
2284 /* Arithmetic. */
2285 set_optab_libfunc (add_optab, HFmode, NULL);
2286 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2287 set_optab_libfunc (smul_optab, HFmode, NULL);
2288 set_optab_libfunc (neg_optab, HFmode, NULL);
2289 set_optab_libfunc (sub_optab, HFmode, NULL);
2291 /* Comparisons. */
2292 set_optab_libfunc (eq_optab, HFmode, NULL);
2293 set_optab_libfunc (ne_optab, HFmode, NULL);
2294 set_optab_libfunc (lt_optab, HFmode, NULL);
2295 set_optab_libfunc (le_optab, HFmode, NULL);
2296 set_optab_libfunc (ge_optab, HFmode, NULL);
2297 set_optab_libfunc (gt_optab, HFmode, NULL);
2298 set_optab_libfunc (unord_optab, HFmode, NULL);
2299 break;
2301 default:
2302 break;
2305 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2307 const arm_fixed_mode_set fixed_arith_modes[] =
2309 { QQmode, "qq" },
2310 { UQQmode, "uqq" },
2311 { HQmode, "hq" },
2312 { UHQmode, "uhq" },
2313 { SQmode, "sq" },
2314 { USQmode, "usq" },
2315 { DQmode, "dq" },
2316 { UDQmode, "udq" },
2317 { TQmode, "tq" },
2318 { UTQmode, "utq" },
2319 { HAmode, "ha" },
2320 { UHAmode, "uha" },
2321 { SAmode, "sa" },
2322 { USAmode, "usa" },
2323 { DAmode, "da" },
2324 { UDAmode, "uda" },
2325 { TAmode, "ta" },
2326 { UTAmode, "uta" }
2328 const arm_fixed_mode_set fixed_conv_modes[] =
2330 { QQmode, "qq" },
2331 { UQQmode, "uqq" },
2332 { HQmode, "hq" },
2333 { UHQmode, "uhq" },
2334 { SQmode, "sq" },
2335 { USQmode, "usq" },
2336 { DQmode, "dq" },
2337 { UDQmode, "udq" },
2338 { TQmode, "tq" },
2339 { UTQmode, "utq" },
2340 { HAmode, "ha" },
2341 { UHAmode, "uha" },
2342 { SAmode, "sa" },
2343 { USAmode, "usa" },
2344 { DAmode, "da" },
2345 { UDAmode, "uda" },
2346 { TAmode, "ta" },
2347 { UTAmode, "uta" },
2348 { QImode, "qi" },
2349 { HImode, "hi" },
2350 { SImode, "si" },
2351 { DImode, "di" },
2352 { TImode, "ti" },
2353 { SFmode, "sf" },
2354 { DFmode, "df" }
2356 unsigned int i, j;
2358 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2360 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2361 "add", fixed_arith_modes[i].name, 3);
2362 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2363 "ssadd", fixed_arith_modes[i].name, 3);
2364 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2365 "usadd", fixed_arith_modes[i].name, 3);
2366 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2367 "sub", fixed_arith_modes[i].name, 3);
2368 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2369 "sssub", fixed_arith_modes[i].name, 3);
2370 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2371 "ussub", fixed_arith_modes[i].name, 3);
2372 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2373 "mul", fixed_arith_modes[i].name, 3);
2374 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2375 "ssmul", fixed_arith_modes[i].name, 3);
2376 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2377 "usmul", fixed_arith_modes[i].name, 3);
2378 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2379 "div", fixed_arith_modes[i].name, 3);
2380 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2381 "udiv", fixed_arith_modes[i].name, 3);
2382 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2383 "ssdiv", fixed_arith_modes[i].name, 3);
2384 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2385 "usdiv", fixed_arith_modes[i].name, 3);
2386 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2387 "neg", fixed_arith_modes[i].name, 2);
2388 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2389 "ssneg", fixed_arith_modes[i].name, 2);
2390 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2391 "usneg", fixed_arith_modes[i].name, 2);
2392 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2393 "ashl", fixed_arith_modes[i].name, 3);
2394 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2395 "ashr", fixed_arith_modes[i].name, 3);
2396 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2397 "lshr", fixed_arith_modes[i].name, 3);
2398 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2399 "ssashl", fixed_arith_modes[i].name, 3);
2400 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2401 "usashl", fixed_arith_modes[i].name, 3);
2402 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2403 "cmp", fixed_arith_modes[i].name, 2);
2406 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2407 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2409 if (i == j
2410 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2411 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2412 continue;
2414 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2415 fixed_conv_modes[j].mode, "fract",
2416 fixed_conv_modes[i].name,
2417 fixed_conv_modes[j].name);
2418 arm_set_fixed_conv_libfunc (satfract_optab,
2419 fixed_conv_modes[i].mode,
2420 fixed_conv_modes[j].mode, "satfract",
2421 fixed_conv_modes[i].name,
2422 fixed_conv_modes[j].name);
2423 arm_set_fixed_conv_libfunc (fractuns_optab,
2424 fixed_conv_modes[i].mode,
2425 fixed_conv_modes[j].mode, "fractuns",
2426 fixed_conv_modes[i].name,
2427 fixed_conv_modes[j].name);
2428 arm_set_fixed_conv_libfunc (satfractuns_optab,
2429 fixed_conv_modes[i].mode,
2430 fixed_conv_modes[j].mode, "satfractuns",
2431 fixed_conv_modes[i].name,
2432 fixed_conv_modes[j].name);
2436 if (TARGET_AAPCS_BASED)
2437 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2440 /* On AAPCS systems, this is the "struct __va_list". */
2441 static GTY(()) tree va_list_type;
2443 /* Return the type to use as __builtin_va_list. */
2444 static tree
2445 arm_build_builtin_va_list (void)
2447 tree va_list_name;
2448 tree ap_field;
2450 if (!TARGET_AAPCS_BASED)
2451 return std_build_builtin_va_list ();
2453 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2454 defined as:
2456 struct __va_list
2458 void *__ap;
2461 The C Library ABI further reinforces this definition in \S
2462 4.1.
2464 We must follow this definition exactly. The structure tag
2465 name is visible in C++ mangled names, and thus forms a part
2466 of the ABI. The field name may be used by people who
2467 #include <stdarg.h>. */
2468 /* Create the type. */
2469 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2470 /* Give it the required name. */
2471 va_list_name = build_decl (BUILTINS_LOCATION,
2472 TYPE_DECL,
2473 get_identifier ("__va_list"),
2474 va_list_type);
2475 DECL_ARTIFICIAL (va_list_name) = 1;
2476 TYPE_NAME (va_list_type) = va_list_name;
2477 TYPE_STUB_DECL (va_list_type) = va_list_name;
2478 /* Create the __ap field. */
2479 ap_field = build_decl (BUILTINS_LOCATION,
2480 FIELD_DECL,
2481 get_identifier ("__ap"),
2482 ptr_type_node);
2483 DECL_ARTIFICIAL (ap_field) = 1;
2484 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2485 TYPE_FIELDS (va_list_type) = ap_field;
2486 /* Compute its layout. */
2487 layout_type (va_list_type);
2489 return va_list_type;
2492 /* Return an expression of type "void *" pointing to the next
2493 available argument in a variable-argument list. VALIST is the
2494 user-level va_list object, of type __builtin_va_list. */
2495 static tree
2496 arm_extract_valist_ptr (tree valist)
2498 if (TREE_TYPE (valist) == error_mark_node)
2499 return error_mark_node;
2501 /* On an AAPCS target, the pointer is stored within "struct
2502 va_list". */
2503 if (TARGET_AAPCS_BASED)
2505 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2506 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2507 valist, ap_field, NULL_TREE);
2510 return valist;
2513 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2514 static void
2515 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2517 valist = arm_extract_valist_ptr (valist);
2518 std_expand_builtin_va_start (valist, nextarg);
2521 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2522 static tree
2523 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2524 gimple_seq *post_p)
2526 valist = arm_extract_valist_ptr (valist);
2527 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2530 /* Fix up any incompatible options that the user has specified. */
2531 static void
2532 arm_option_override (void)
2534 if (global_options_set.x_arm_arch_option)
2535 arm_selected_arch = &all_architectures[arm_arch_option];
2537 if (global_options_set.x_arm_cpu_option)
2539 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2540 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2543 if (global_options_set.x_arm_tune_option)
2544 arm_selected_tune = &all_cores[(int) arm_tune_option];
2546 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2547 SUBTARGET_OVERRIDE_OPTIONS;
2548 #endif
2550 if (arm_selected_arch)
2552 if (arm_selected_cpu)
2554 /* Check for conflict between mcpu and march. */
2555 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2557 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2558 arm_selected_cpu->name, arm_selected_arch->name);
2559 /* -march wins for code generation.
2560 -mcpu wins for default tuning. */
2561 if (!arm_selected_tune)
2562 arm_selected_tune = arm_selected_cpu;
2564 arm_selected_cpu = arm_selected_arch;
2566 else
2567 /* -mcpu wins. */
2568 arm_selected_arch = NULL;
2570 else
2571 /* Pick a CPU based on the architecture. */
2572 arm_selected_cpu = arm_selected_arch;
2575 /* If the user did not specify a processor, choose one for them. */
2576 if (!arm_selected_cpu)
2578 const struct processors * sel;
2579 unsigned int sought;
2581 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2582 if (!arm_selected_cpu->name)
2584 #ifdef SUBTARGET_CPU_DEFAULT
2585 /* Use the subtarget default CPU if none was specified by
2586 configure. */
2587 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2588 #endif
2589 /* Default to ARM6. */
2590 if (!arm_selected_cpu->name)
2591 arm_selected_cpu = &all_cores[arm6];
2594 sel = arm_selected_cpu;
2595 insn_flags = sel->flags;
2597 /* Now check to see if the user has specified some command line
2598 switch that require certain abilities from the cpu. */
2599 sought = 0;
2601 if (TARGET_INTERWORK || TARGET_THUMB)
2603 sought |= (FL_THUMB | FL_MODE32);
2605 /* There are no ARM processors that support both APCS-26 and
2606 interworking. Therefore we force FL_MODE26 to be removed
2607 from insn_flags here (if it was set), so that the search
2608 below will always be able to find a compatible processor. */
2609 insn_flags &= ~FL_MODE26;
2612 if (sought != 0 && ((sought & insn_flags) != sought))
2614 /* Try to locate a CPU type that supports all of the abilities
2615 of the default CPU, plus the extra abilities requested by
2616 the user. */
2617 for (sel = all_cores; sel->name != NULL; sel++)
2618 if ((sel->flags & sought) == (sought | insn_flags))
2619 break;
2621 if (sel->name == NULL)
2623 unsigned current_bit_count = 0;
2624 const struct processors * best_fit = NULL;
2626 /* Ideally we would like to issue an error message here
2627 saying that it was not possible to find a CPU compatible
2628 with the default CPU, but which also supports the command
2629 line options specified by the programmer, and so they
2630 ought to use the -mcpu=<name> command line option to
2631 override the default CPU type.
2633 If we cannot find a cpu that has both the
2634 characteristics of the default cpu and the given
2635 command line options we scan the array again looking
2636 for a best match. */
2637 for (sel = all_cores; sel->name != NULL; sel++)
2638 if ((sel->flags & sought) == sought)
2640 unsigned count;
2642 count = bit_count (sel->flags & insn_flags);
2644 if (count >= current_bit_count)
2646 best_fit = sel;
2647 current_bit_count = count;
2651 gcc_assert (best_fit);
2652 sel = best_fit;
2655 arm_selected_cpu = sel;
2659 gcc_assert (arm_selected_cpu);
2660 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2661 if (!arm_selected_tune)
2662 arm_selected_tune = &all_cores[arm_selected_cpu->core];
2664 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2665 insn_flags = arm_selected_cpu->flags;
2666 arm_base_arch = arm_selected_cpu->base_arch;
2668 arm_tune = arm_selected_tune->core;
2669 tune_flags = arm_selected_tune->flags;
2670 current_tune = arm_selected_tune->tune;
2672 /* Make sure that the processor choice does not conflict with any of the
2673 other command line choices. */
2674 if (TARGET_ARM && !(insn_flags & FL_NOTM))
2675 error ("target CPU does not support ARM mode");
2677 /* BPABI targets use linker tricks to allow interworking on cores
2678 without thumb support. */
2679 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2681 warning (0, "target CPU does not support interworking" );
2682 target_flags &= ~MASK_INTERWORK;
2685 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2687 warning (0, "target CPU does not support THUMB instructions");
2688 target_flags &= ~MASK_THUMB;
2691 if (TARGET_APCS_FRAME && TARGET_THUMB)
2693 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2694 target_flags &= ~MASK_APCS_FRAME;
2697 /* Callee super interworking implies thumb interworking. Adding
2698 this to the flags here simplifies the logic elsewhere. */
2699 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2700 target_flags |= MASK_INTERWORK;
2702 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2703 from here where no function is being compiled currently. */
2704 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2705 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2707 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2708 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2710 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2712 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2713 target_flags |= MASK_APCS_FRAME;
2716 if (TARGET_POKE_FUNCTION_NAME)
2717 target_flags |= MASK_APCS_FRAME;
2719 if (TARGET_APCS_REENT && flag_pic)
2720 error ("-fpic and -mapcs-reent are incompatible");
2722 if (TARGET_APCS_REENT)
2723 warning (0, "APCS reentrant code not supported. Ignored");
2725 /* If this target is normally configured to use APCS frames, warn if they
2726 are turned off and debugging is turned on. */
2727 if (TARGET_ARM
2728 && write_symbols != NO_DEBUG
2729 && !TARGET_APCS_FRAME
2730 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2731 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2733 if (TARGET_APCS_FLOAT)
2734 warning (0, "passing floating point arguments in fp regs not yet supported");
2736 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2737 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2738 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2739 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2740 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2741 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2742 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2743 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2744 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2745 arm_arch6m = arm_arch6 && !arm_arch_notm;
2746 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2747 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2748 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2749 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2750 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2752 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2753 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2754 thumb_code = TARGET_ARM == 0;
2755 thumb1_code = TARGET_THUMB1 != 0;
2756 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2757 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2758 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2759 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2760 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2761 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2762 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2763 arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2764 arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0;
2765 if (arm_restrict_it == 2)
2766 arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2768 if (!TARGET_THUMB2)
2769 arm_restrict_it = 0;
2771 /* If we are not using the default (ARM mode) section anchor offset
2772 ranges, then set the correct ranges now. */
2773 if (TARGET_THUMB1)
2775 /* Thumb-1 LDR instructions cannot have negative offsets.
2776 Permissible positive offset ranges are 5-bit (for byte loads),
2777 6-bit (for halfword loads), or 7-bit (for word loads).
2778 Empirical results suggest a 7-bit anchor range gives the best
2779 overall code size. */
2780 targetm.min_anchor_offset = 0;
2781 targetm.max_anchor_offset = 127;
2783 else if (TARGET_THUMB2)
2785 /* The minimum is set such that the total size of the block
2786 for a particular anchor is 248 + 1 + 4095 bytes, which is
2787 divisible by eight, ensuring natural spacing of anchors. */
2788 targetm.min_anchor_offset = -248;
2789 targetm.max_anchor_offset = 4095;
2792 /* V5 code we generate is completely interworking capable, so we turn off
2793 TARGET_INTERWORK here to avoid many tests later on. */
2795 /* XXX However, we must pass the right pre-processor defines to CPP
2796 or GLD can get confused. This is a hack. */
2797 if (TARGET_INTERWORK)
2798 arm_cpp_interwork = 1;
2800 if (arm_arch5)
2801 target_flags &= ~MASK_INTERWORK;
2803 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2804 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2806 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2807 error ("iwmmxt abi requires an iwmmxt capable cpu");
2809 if (!global_options_set.x_arm_fpu_index)
2811 const char *target_fpu_name;
2812 bool ok;
2814 #ifdef FPUTYPE_DEFAULT
2815 target_fpu_name = FPUTYPE_DEFAULT;
2816 #else
2817 target_fpu_name = "vfp";
2818 #endif
2820 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2821 CL_TARGET);
2822 gcc_assert (ok);
2825 arm_fpu_desc = &all_fpus[arm_fpu_index];
2827 if (TARGET_NEON && !arm_arch7)
2828 error ("target CPU does not support NEON");
2830 switch (arm_fpu_desc->model)
2832 case ARM_FP_MODEL_VFP:
2833 arm_fpu_attr = FPU_VFP;
2834 break;
2836 default:
2837 gcc_unreachable();
2840 if (TARGET_AAPCS_BASED)
2842 if (TARGET_CALLER_INTERWORKING)
2843 error ("AAPCS does not support -mcaller-super-interworking");
2844 else
2845 if (TARGET_CALLEE_INTERWORKING)
2846 error ("AAPCS does not support -mcallee-super-interworking");
2849 /* iWMMXt and NEON are incompatible. */
2850 if (TARGET_IWMMXT && TARGET_NEON)
2851 error ("iWMMXt and NEON are incompatible");
2853 /* iWMMXt unsupported under Thumb mode. */
2854 if (TARGET_THUMB && TARGET_IWMMXT)
2855 error ("iWMMXt unsupported under Thumb mode");
2857 /* __fp16 support currently assumes the core has ldrh. */
2858 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2859 sorry ("__fp16 and no ldrh");
2861 /* If soft-float is specified then don't use FPU. */
2862 if (TARGET_SOFT_FLOAT)
2863 arm_fpu_attr = FPU_NONE;
2865 if (TARGET_AAPCS_BASED)
2867 if (arm_abi == ARM_ABI_IWMMXT)
2868 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2869 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2870 && TARGET_HARD_FLOAT
2871 && TARGET_VFP)
2872 arm_pcs_default = ARM_PCS_AAPCS_VFP;
2873 else
2874 arm_pcs_default = ARM_PCS_AAPCS;
2876 else
2878 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2879 sorry ("-mfloat-abi=hard and VFP");
2881 if (arm_abi == ARM_ABI_APCS)
2882 arm_pcs_default = ARM_PCS_APCS;
2883 else
2884 arm_pcs_default = ARM_PCS_ATPCS;
2887 /* For arm2/3 there is no need to do any scheduling if we are doing
2888 software floating-point. */
2889 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2890 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2892 /* Use the cp15 method if it is available. */
2893 if (target_thread_pointer == TP_AUTO)
2895 if (arm_arch6k && !TARGET_THUMB1)
2896 target_thread_pointer = TP_CP15;
2897 else
2898 target_thread_pointer = TP_SOFT;
2901 if (TARGET_HARD_TP && TARGET_THUMB1)
2902 error ("can not use -mtp=cp15 with 16-bit Thumb");
2904 /* Override the default structure alignment for AAPCS ABI. */
2905 if (!global_options_set.x_arm_structure_size_boundary)
2907 if (TARGET_AAPCS_BASED)
2908 arm_structure_size_boundary = 8;
2910 else
2912 if (arm_structure_size_boundary != 8
2913 && arm_structure_size_boundary != 32
2914 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2916 if (ARM_DOUBLEWORD_ALIGN)
2917 warning (0,
2918 "structure size boundary can only be set to 8, 32 or 64");
2919 else
2920 warning (0, "structure size boundary can only be set to 8 or 32");
2921 arm_structure_size_boundary
2922 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2926 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2928 error ("RTP PIC is incompatible with Thumb");
2929 flag_pic = 0;
2932 /* If stack checking is disabled, we can use r10 as the PIC register,
2933 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2934 if (flag_pic && TARGET_SINGLE_PIC_BASE)
2936 if (TARGET_VXWORKS_RTP)
2937 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2938 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2941 if (flag_pic && TARGET_VXWORKS_RTP)
2942 arm_pic_register = 9;
2944 if (arm_pic_register_string != NULL)
2946 int pic_register = decode_reg_name (arm_pic_register_string);
2948 if (!flag_pic)
2949 warning (0, "-mpic-register= is useless without -fpic");
2951 /* Prevent the user from choosing an obviously stupid PIC register. */
2952 else if (pic_register < 0 || call_used_regs[pic_register]
2953 || pic_register == HARD_FRAME_POINTER_REGNUM
2954 || pic_register == STACK_POINTER_REGNUM
2955 || pic_register >= PC_REGNUM
2956 || (TARGET_VXWORKS_RTP
2957 && (unsigned int) pic_register != arm_pic_register))
2958 error ("unable to use '%s' for PIC register", arm_pic_register_string);
2959 else
2960 arm_pic_register = pic_register;
2963 if (TARGET_VXWORKS_RTP
2964 && !global_options_set.x_arm_pic_data_is_text_relative)
2965 arm_pic_data_is_text_relative = 0;
2967 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2968 if (fix_cm3_ldrd == 2)
2970 if (arm_selected_cpu->core == cortexm3)
2971 fix_cm3_ldrd = 1;
2972 else
2973 fix_cm3_ldrd = 0;
2976 /* Enable -munaligned-access by default for
2977 - all ARMv6 architecture-based processors
2978 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2979 - ARMv8 architecture-base processors.
2981 Disable -munaligned-access by default for
2982 - all pre-ARMv6 architecture-based processors
2983 - ARMv6-M architecture-based processors. */
2985 if (unaligned_access == 2)
2987 if (arm_arch6 && (arm_arch_notm || arm_arch7))
2988 unaligned_access = 1;
2989 else
2990 unaligned_access = 0;
2992 else if (unaligned_access == 1
2993 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2995 warning (0, "target CPU does not support unaligned accesses");
2996 unaligned_access = 0;
2999 if (TARGET_THUMB1 && flag_schedule_insns)
3001 /* Don't warn since it's on by default in -O2. */
3002 flag_schedule_insns = 0;
3005 if (optimize_size)
3007 /* If optimizing for size, bump the number of instructions that we
3008 are prepared to conditionally execute (even on a StrongARM). */
3009 max_insns_skipped = 6;
3011 /* For THUMB2, we limit the conditional sequence to one IT block. */
3012 if (TARGET_THUMB2)
3013 max_insns_skipped = MAX_INSN_PER_IT_BLOCK;
3015 else
3016 max_insns_skipped = current_tune->max_insns_skipped;
3018 /* Hot/Cold partitioning is not currently supported, since we can't
3019 handle literal pool placement in that case. */
3020 if (flag_reorder_blocks_and_partition)
3022 inform (input_location,
3023 "-freorder-blocks-and-partition not supported on this architecture");
3024 flag_reorder_blocks_and_partition = 0;
3025 flag_reorder_blocks = 1;
3028 if (flag_pic)
3029 /* Hoisting PIC address calculations more aggressively provides a small,
3030 but measurable, size reduction for PIC code. Therefore, we decrease
3031 the bar for unrestricted expression hoisting to the cost of PIC address
3032 calculation, which is 2 instructions. */
3033 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3034 global_options.x_param_values,
3035 global_options_set.x_param_values);
3037 /* ARM EABI defaults to strict volatile bitfields. */
3038 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3039 && abi_version_at_least(2))
3040 flag_strict_volatile_bitfields = 1;
3042 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
3043 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
3044 if (flag_prefetch_loop_arrays < 0
3045 && HAVE_prefetch
3046 && optimize >= 3
3047 && current_tune->num_prefetch_slots > 0)
3048 flag_prefetch_loop_arrays = 1;
3050 /* Set up parameters to be used in prefetching algorithm. Do not override the
3051 defaults unless we are tuning for a core we have researched values for. */
3052 if (current_tune->num_prefetch_slots > 0)
3053 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3054 current_tune->num_prefetch_slots,
3055 global_options.x_param_values,
3056 global_options_set.x_param_values);
3057 if (current_tune->l1_cache_line_size >= 0)
3058 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3059 current_tune->l1_cache_line_size,
3060 global_options.x_param_values,
3061 global_options_set.x_param_values);
3062 if (current_tune->l1_cache_size >= 0)
3063 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3064 current_tune->l1_cache_size,
3065 global_options.x_param_values,
3066 global_options_set.x_param_values);
3068 /* Use Neon to perform 64-bits operations rather than core
3069 registers. */
3070 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3071 if (use_neon_for_64bits == 1)
3072 prefer_neon_for_64bits = true;
3074 /* Use the alternative scheduling-pressure algorithm by default. */
3075 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3076 global_options.x_param_values,
3077 global_options_set.x_param_values);
3079 /* Disable shrink-wrap when optimizing function for size, since it tends to
3080 generate additional returns. */
3081 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
3082 flag_shrink_wrap = false;
3083 /* TBD: Dwarf info for apcs frame is not handled yet. */
3084 if (TARGET_APCS_FRAME)
3085 flag_shrink_wrap = false;
3087 /* We only support -mslow-flash-data on armv7-m targets. */
3088 if (target_slow_flash_data
3089 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
3090 || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
3091 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
3093 /* Currently, for slow flash data, we just disable literal pools. */
3094 if (target_slow_flash_data)
3095 arm_disable_literal_pool = true;
3097 /* Thumb2 inline assembly code should always use unified syntax.
3098 This will apply to ARM and Thumb1 eventually. */
3099 if (TARGET_THUMB2)
3100 inline_asm_unified = 1;
3102 /* Disable scheduling fusion by default if it's not armv7 processor
3103 or doesn't prefer ldrd/strd. */
3104 if (flag_schedule_fusion == 2
3105 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3106 flag_schedule_fusion = 0;
3108 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3109 - epilogue_insns - does not accurately model the corresponding insns
3110 emitted in the asm file. In particular, see the comment in thumb_exit
3111 'Find out how many of the (return) argument registers we can corrupt'.
3112 As a consequence, the epilogue may clobber registers without
3113 fuse-caller-save finding out about it. Therefore, disable fuse-caller-save
3114 in Thumb1 mode.
3115 TODO: Accurately model clobbers for epilogue_insns and reenable
3116 fuse-caller-save. */
3117 if (TARGET_THUMB1)
3118 flag_use_caller_save = 0;
3120 /* Register global variables with the garbage collector. */
3121 arm_add_gc_roots ();
3124 static void
3125 arm_add_gc_roots (void)
3127 gcc_obstack_init(&minipool_obstack);
3128 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3131 /* A table of known ARM exception types.
3132 For use with the interrupt function attribute. */
3134 typedef struct
3136 const char *const arg;
3137 const unsigned long return_value;
3139 isr_attribute_arg;
3141 static const isr_attribute_arg isr_attribute_args [] =
3143 { "IRQ", ARM_FT_ISR },
3144 { "irq", ARM_FT_ISR },
3145 { "FIQ", ARM_FT_FIQ },
3146 { "fiq", ARM_FT_FIQ },
3147 { "ABORT", ARM_FT_ISR },
3148 { "abort", ARM_FT_ISR },
3149 { "ABORT", ARM_FT_ISR },
3150 { "abort", ARM_FT_ISR },
3151 { "UNDEF", ARM_FT_EXCEPTION },
3152 { "undef", ARM_FT_EXCEPTION },
3153 { "SWI", ARM_FT_EXCEPTION },
3154 { "swi", ARM_FT_EXCEPTION },
3155 { NULL, ARM_FT_NORMAL }
3158 /* Returns the (interrupt) function type of the current
3159 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3161 static unsigned long
3162 arm_isr_value (tree argument)
3164 const isr_attribute_arg * ptr;
3165 const char * arg;
3167 if (!arm_arch_notm)
3168 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3170 /* No argument - default to IRQ. */
3171 if (argument == NULL_TREE)
3172 return ARM_FT_ISR;
3174 /* Get the value of the argument. */
3175 if (TREE_VALUE (argument) == NULL_TREE
3176 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3177 return ARM_FT_UNKNOWN;
3179 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3181 /* Check it against the list of known arguments. */
3182 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3183 if (streq (arg, ptr->arg))
3184 return ptr->return_value;
3186 /* An unrecognized interrupt type. */
3187 return ARM_FT_UNKNOWN;
3190 /* Computes the type of the current function. */
3192 static unsigned long
3193 arm_compute_func_type (void)
3195 unsigned long type = ARM_FT_UNKNOWN;
3196 tree a;
3197 tree attr;
3199 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3201 /* Decide if the current function is volatile. Such functions
3202 never return, and many memory cycles can be saved by not storing
3203 register values that will never be needed again. This optimization
3204 was added to speed up context switching in a kernel application. */
3205 if (optimize > 0
3206 && (TREE_NOTHROW (current_function_decl)
3207 || !(flag_unwind_tables
3208 || (flag_exceptions
3209 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3210 && TREE_THIS_VOLATILE (current_function_decl))
3211 type |= ARM_FT_VOLATILE;
3213 if (cfun->static_chain_decl != NULL)
3214 type |= ARM_FT_NESTED;
3216 attr = DECL_ATTRIBUTES (current_function_decl);
3218 a = lookup_attribute ("naked", attr);
3219 if (a != NULL_TREE)
3220 type |= ARM_FT_NAKED;
3222 a = lookup_attribute ("isr", attr);
3223 if (a == NULL_TREE)
3224 a = lookup_attribute ("interrupt", attr);
3226 if (a == NULL_TREE)
3227 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3228 else
3229 type |= arm_isr_value (TREE_VALUE (a));
3231 return type;
3234 /* Returns the type of the current function. */
3236 unsigned long
3237 arm_current_func_type (void)
3239 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3240 cfun->machine->func_type = arm_compute_func_type ();
3242 return cfun->machine->func_type;
3245 bool
3246 arm_allocate_stack_slots_for_args (void)
3248 /* Naked functions should not allocate stack slots for arguments. */
3249 return !IS_NAKED (arm_current_func_type ());
3252 static bool
3253 arm_warn_func_return (tree decl)
3255 /* Naked functions are implemented entirely in assembly, including the
3256 return sequence, so suppress warnings about this. */
3257 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3261 /* Output assembler code for a block containing the constant parts
3262 of a trampoline, leaving space for the variable parts.
3264 On the ARM, (if r8 is the static chain regnum, and remembering that
3265 referencing pc adds an offset of 8) the trampoline looks like:
3266 ldr r8, [pc, #0]
3267 ldr pc, [pc]
3268 .word static chain value
3269 .word function's address
3270 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3272 static void
3273 arm_asm_trampoline_template (FILE *f)
3275 if (TARGET_ARM)
3277 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3278 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3280 else if (TARGET_THUMB2)
3282 /* The Thumb-2 trampoline is similar to the arm implementation.
3283 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3284 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3285 STATIC_CHAIN_REGNUM, PC_REGNUM);
3286 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3288 else
3290 ASM_OUTPUT_ALIGN (f, 2);
3291 fprintf (f, "\t.code\t16\n");
3292 fprintf (f, ".Ltrampoline_start:\n");
3293 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3294 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3295 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3296 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3297 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3298 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3300 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3301 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3304 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3306 static void
3307 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3309 rtx fnaddr, mem, a_tramp;
3311 emit_block_move (m_tramp, assemble_trampoline_template (),
3312 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3314 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3315 emit_move_insn (mem, chain_value);
3317 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3318 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3319 emit_move_insn (mem, fnaddr);
3321 a_tramp = XEXP (m_tramp, 0);
3322 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3323 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3324 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3327 /* Thumb trampolines should be entered in thumb mode, so set
3328 the bottom bit of the address. */
3330 static rtx
3331 arm_trampoline_adjust_address (rtx addr)
3333 if (TARGET_THUMB)
3334 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3335 NULL, 0, OPTAB_LIB_WIDEN);
3336 return addr;
3339 /* Return 1 if it is possible to return using a single instruction.
3340 If SIBLING is non-null, this is a test for a return before a sibling
3341 call. SIBLING is the call insn, so we can examine its register usage. */
3344 use_return_insn (int iscond, rtx sibling)
3346 int regno;
3347 unsigned int func_type;
3348 unsigned long saved_int_regs;
3349 unsigned HOST_WIDE_INT stack_adjust;
3350 arm_stack_offsets *offsets;
3352 /* Never use a return instruction before reload has run. */
3353 if (!reload_completed)
3354 return 0;
3356 func_type = arm_current_func_type ();
3358 /* Naked, volatile and stack alignment functions need special
3359 consideration. */
3360 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3361 return 0;
3363 /* So do interrupt functions that use the frame pointer and Thumb
3364 interrupt functions. */
3365 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3366 return 0;
3368 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3369 && !optimize_function_for_size_p (cfun))
3370 return 0;
3372 offsets = arm_get_frame_offsets ();
3373 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3375 /* As do variadic functions. */
3376 if (crtl->args.pretend_args_size
3377 || cfun->machine->uses_anonymous_args
3378 /* Or if the function calls __builtin_eh_return () */
3379 || crtl->calls_eh_return
3380 /* Or if the function calls alloca */
3381 || cfun->calls_alloca
3382 /* Or if there is a stack adjustment. However, if the stack pointer
3383 is saved on the stack, we can use a pre-incrementing stack load. */
3384 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3385 && stack_adjust == 4)))
3386 return 0;
3388 saved_int_regs = offsets->saved_regs_mask;
3390 /* Unfortunately, the insn
3392 ldmib sp, {..., sp, ...}
3394 triggers a bug on most SA-110 based devices, such that the stack
3395 pointer won't be correctly restored if the instruction takes a
3396 page fault. We work around this problem by popping r3 along with
3397 the other registers, since that is never slower than executing
3398 another instruction.
3400 We test for !arm_arch5 here, because code for any architecture
3401 less than this could potentially be run on one of the buggy
3402 chips. */
3403 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3405 /* Validate that r3 is a call-clobbered register (always true in
3406 the default abi) ... */
3407 if (!call_used_regs[3])
3408 return 0;
3410 /* ... that it isn't being used for a return value ... */
3411 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3412 return 0;
3414 /* ... or for a tail-call argument ... */
3415 if (sibling)
3417 gcc_assert (CALL_P (sibling));
3419 if (find_regno_fusage (sibling, USE, 3))
3420 return 0;
3423 /* ... and that there are no call-saved registers in r0-r2
3424 (always true in the default ABI). */
3425 if (saved_int_regs & 0x7)
3426 return 0;
3429 /* Can't be done if interworking with Thumb, and any registers have been
3430 stacked. */
3431 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3432 return 0;
3434 /* On StrongARM, conditional returns are expensive if they aren't
3435 taken and multiple registers have been stacked. */
3436 if (iscond && arm_tune_strongarm)
3438 /* Conditional return when just the LR is stored is a simple
3439 conditional-load instruction, that's not expensive. */
3440 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3441 return 0;
3443 if (flag_pic
3444 && arm_pic_register != INVALID_REGNUM
3445 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3446 return 0;
3449 /* If there are saved registers but the LR isn't saved, then we need
3450 two instructions for the return. */
3451 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3452 return 0;
3454 /* Can't be done if any of the VFP regs are pushed,
3455 since this also requires an insn. */
3456 if (TARGET_HARD_FLOAT && TARGET_VFP)
3457 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3458 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3459 return 0;
3461 if (TARGET_REALLY_IWMMXT)
3462 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3463 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3464 return 0;
3466 return 1;
3469 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3470 shrink-wrapping if possible. This is the case if we need to emit a
3471 prologue, which we can test by looking at the offsets. */
3472 bool
3473 use_simple_return_p (void)
3475 arm_stack_offsets *offsets;
3477 offsets = arm_get_frame_offsets ();
3478 return offsets->outgoing_args != 0;
3481 /* Return TRUE if int I is a valid immediate ARM constant. */
3484 const_ok_for_arm (HOST_WIDE_INT i)
3486 int lowbit;
3488 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3489 be all zero, or all one. */
3490 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3491 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3492 != ((~(unsigned HOST_WIDE_INT) 0)
3493 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3494 return FALSE;
3496 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3498 /* Fast return for 0 and small values. We must do this for zero, since
3499 the code below can't handle that one case. */
3500 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3501 return TRUE;
3503 /* Get the number of trailing zeros. */
3504 lowbit = ffs((int) i) - 1;
3506 /* Only even shifts are allowed in ARM mode so round down to the
3507 nearest even number. */
3508 if (TARGET_ARM)
3509 lowbit &= ~1;
3511 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3512 return TRUE;
3514 if (TARGET_ARM)
3516 /* Allow rotated constants in ARM mode. */
3517 if (lowbit <= 4
3518 && ((i & ~0xc000003f) == 0
3519 || (i & ~0xf000000f) == 0
3520 || (i & ~0xfc000003) == 0))
3521 return TRUE;
3523 else
3525 HOST_WIDE_INT v;
3527 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3528 v = i & 0xff;
3529 v |= v << 16;
3530 if (i == v || i == (v | (v << 8)))
3531 return TRUE;
3533 /* Allow repeated pattern 0xXY00XY00. */
3534 v = i & 0xff00;
3535 v |= v << 16;
3536 if (i == v)
3537 return TRUE;
3540 return FALSE;
3543 /* Return true if I is a valid constant for the operation CODE. */
3545 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3547 if (const_ok_for_arm (i))
3548 return 1;
3550 switch (code)
3552 case SET:
3553 /* See if we can use movw. */
3554 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3555 return 1;
3556 else
3557 /* Otherwise, try mvn. */
3558 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3560 case PLUS:
3561 /* See if we can use addw or subw. */
3562 if (TARGET_THUMB2
3563 && ((i & 0xfffff000) == 0
3564 || ((-i) & 0xfffff000) == 0))
3565 return 1;
3566 /* else fall through. */
3568 case COMPARE:
3569 case EQ:
3570 case NE:
3571 case GT:
3572 case LE:
3573 case LT:
3574 case GE:
3575 case GEU:
3576 case LTU:
3577 case GTU:
3578 case LEU:
3579 case UNORDERED:
3580 case ORDERED:
3581 case UNEQ:
3582 case UNGE:
3583 case UNLT:
3584 case UNGT:
3585 case UNLE:
3586 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3588 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3589 case XOR:
3590 return 0;
3592 case IOR:
3593 if (TARGET_THUMB2)
3594 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3595 return 0;
3597 case AND:
3598 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3600 default:
3601 gcc_unreachable ();
3605 /* Return true if I is a valid di mode constant for the operation CODE. */
3607 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3609 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3610 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3611 rtx hi = GEN_INT (hi_val);
3612 rtx lo = GEN_INT (lo_val);
3614 if (TARGET_THUMB1)
3615 return 0;
3617 switch (code)
3619 case AND:
3620 case IOR:
3621 case XOR:
3622 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3623 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3624 case PLUS:
3625 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3627 default:
3628 return 0;
3632 /* Emit a sequence of insns to handle a large constant.
3633 CODE is the code of the operation required, it can be any of SET, PLUS,
3634 IOR, AND, XOR, MINUS;
3635 MODE is the mode in which the operation is being performed;
3636 VAL is the integer to operate on;
3637 SOURCE is the other operand (a register, or a null-pointer for SET);
3638 SUBTARGETS means it is safe to create scratch registers if that will
3639 either produce a simpler sequence, or we will want to cse the values.
3640 Return value is the number of insns emitted. */
3642 /* ??? Tweak this for thumb2. */
3644 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
3645 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3647 rtx cond;
3649 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3650 cond = COND_EXEC_TEST (PATTERN (insn));
3651 else
3652 cond = NULL_RTX;
3654 if (subtargets || code == SET
3655 || (REG_P (target) && REG_P (source)
3656 && REGNO (target) != REGNO (source)))
3658 /* After arm_reorg has been called, we can't fix up expensive
3659 constants by pushing them into memory so we must synthesize
3660 them in-line, regardless of the cost. This is only likely to
3661 be more costly on chips that have load delay slots and we are
3662 compiling without running the scheduler (so no splitting
3663 occurred before the final instruction emission).
3665 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3667 if (!cfun->machine->after_arm_reorg
3668 && !cond
3669 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3670 1, 0)
3671 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3672 + (code != SET))))
3674 if (code == SET)
3676 /* Currently SET is the only monadic value for CODE, all
3677 the rest are diadic. */
3678 if (TARGET_USE_MOVT)
3679 arm_emit_movpair (target, GEN_INT (val));
3680 else
3681 emit_set_insn (target, GEN_INT (val));
3683 return 1;
3685 else
3687 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3689 if (TARGET_USE_MOVT)
3690 arm_emit_movpair (temp, GEN_INT (val));
3691 else
3692 emit_set_insn (temp, GEN_INT (val));
3694 /* For MINUS, the value is subtracted from, since we never
3695 have subtraction of a constant. */
3696 if (code == MINUS)
3697 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3698 else
3699 emit_set_insn (target,
3700 gen_rtx_fmt_ee (code, mode, source, temp));
3701 return 2;
3706 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3710 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3711 ARM/THUMB2 immediates, and add up to VAL.
3712 Thr function return value gives the number of insns required. */
3713 static int
3714 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3715 struct four_ints *return_sequence)
3717 int best_consecutive_zeros = 0;
3718 int i;
3719 int best_start = 0;
3720 int insns1, insns2;
3721 struct four_ints tmp_sequence;
3723 /* If we aren't targeting ARM, the best place to start is always at
3724 the bottom, otherwise look more closely. */
3725 if (TARGET_ARM)
3727 for (i = 0; i < 32; i += 2)
3729 int consecutive_zeros = 0;
3731 if (!(val & (3 << i)))
3733 while ((i < 32) && !(val & (3 << i)))
3735 consecutive_zeros += 2;
3736 i += 2;
3738 if (consecutive_zeros > best_consecutive_zeros)
3740 best_consecutive_zeros = consecutive_zeros;
3741 best_start = i - consecutive_zeros;
3743 i -= 2;
3748 /* So long as it won't require any more insns to do so, it's
3749 desirable to emit a small constant (in bits 0...9) in the last
3750 insn. This way there is more chance that it can be combined with
3751 a later addressing insn to form a pre-indexed load or store
3752 operation. Consider:
3754 *((volatile int *)0xe0000100) = 1;
3755 *((volatile int *)0xe0000110) = 2;
3757 We want this to wind up as:
3759 mov rA, #0xe0000000
3760 mov rB, #1
3761 str rB, [rA, #0x100]
3762 mov rB, #2
3763 str rB, [rA, #0x110]
3765 rather than having to synthesize both large constants from scratch.
3767 Therefore, we calculate how many insns would be required to emit
3768 the constant starting from `best_start', and also starting from
3769 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3770 yield a shorter sequence, we may as well use zero. */
3771 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3772 if (best_start != 0
3773 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3775 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3776 if (insns2 <= insns1)
3778 *return_sequence = tmp_sequence;
3779 insns1 = insns2;
3783 return insns1;
3786 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3787 static int
3788 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3789 struct four_ints *return_sequence, int i)
3791 int remainder = val & 0xffffffff;
3792 int insns = 0;
3794 /* Try and find a way of doing the job in either two or three
3795 instructions.
3797 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3798 location. We start at position I. This may be the MSB, or
3799 optimial_immediate_sequence may have positioned it at the largest block
3800 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3801 wrapping around to the top of the word when we drop off the bottom.
3802 In the worst case this code should produce no more than four insns.
3804 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3805 constants, shifted to any arbitrary location. We should always start
3806 at the MSB. */
3809 int end;
3810 unsigned int b1, b2, b3, b4;
3811 unsigned HOST_WIDE_INT result;
3812 int loc;
3814 gcc_assert (insns < 4);
3816 if (i <= 0)
3817 i += 32;
3819 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3820 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3822 loc = i;
3823 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3824 /* We can use addw/subw for the last 12 bits. */
3825 result = remainder;
3826 else
3828 /* Use an 8-bit shifted/rotated immediate. */
3829 end = i - 8;
3830 if (end < 0)
3831 end += 32;
3832 result = remainder & ((0x0ff << end)
3833 | ((i < end) ? (0xff >> (32 - end))
3834 : 0));
3835 i -= 8;
3838 else
3840 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3841 arbitrary shifts. */
3842 i -= TARGET_ARM ? 2 : 1;
3843 continue;
3846 /* Next, see if we can do a better job with a thumb2 replicated
3847 constant.
3849 We do it this way around to catch the cases like 0x01F001E0 where
3850 two 8-bit immediates would work, but a replicated constant would
3851 make it worse.
3853 TODO: 16-bit constants that don't clear all the bits, but still win.
3854 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3855 if (TARGET_THUMB2)
3857 b1 = (remainder & 0xff000000) >> 24;
3858 b2 = (remainder & 0x00ff0000) >> 16;
3859 b3 = (remainder & 0x0000ff00) >> 8;
3860 b4 = remainder & 0xff;
3862 if (loc > 24)
3864 /* The 8-bit immediate already found clears b1 (and maybe b2),
3865 but must leave b3 and b4 alone. */
3867 /* First try to find a 32-bit replicated constant that clears
3868 almost everything. We can assume that we can't do it in one,
3869 or else we wouldn't be here. */
3870 unsigned int tmp = b1 & b2 & b3 & b4;
3871 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3872 + (tmp << 24);
3873 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3874 + (tmp == b3) + (tmp == b4);
3875 if (tmp
3876 && (matching_bytes >= 3
3877 || (matching_bytes == 2
3878 && const_ok_for_op (remainder & ~tmp2, code))))
3880 /* At least 3 of the bytes match, and the fourth has at
3881 least as many bits set, or two of the bytes match
3882 and it will only require one more insn to finish. */
3883 result = tmp2;
3884 i = tmp != b1 ? 32
3885 : tmp != b2 ? 24
3886 : tmp != b3 ? 16
3887 : 8;
3890 /* Second, try to find a 16-bit replicated constant that can
3891 leave three of the bytes clear. If b2 or b4 is already
3892 zero, then we can. If the 8-bit from above would not
3893 clear b2 anyway, then we still win. */
3894 else if (b1 == b3 && (!b2 || !b4
3895 || (remainder & 0x00ff0000 & ~result)))
3897 result = remainder & 0xff00ff00;
3898 i = 24;
3901 else if (loc > 16)
3903 /* The 8-bit immediate already found clears b2 (and maybe b3)
3904 and we don't get here unless b1 is alredy clear, but it will
3905 leave b4 unchanged. */
3907 /* If we can clear b2 and b4 at once, then we win, since the
3908 8-bits couldn't possibly reach that far. */
3909 if (b2 == b4)
3911 result = remainder & 0x00ff00ff;
3912 i = 16;
3917 return_sequence->i[insns++] = result;
3918 remainder &= ~result;
3920 if (code == SET || code == MINUS)
3921 code = PLUS;
3923 while (remainder);
3925 return insns;
3928 /* Emit an instruction with the indicated PATTERN. If COND is
3929 non-NULL, conditionalize the execution of the instruction on COND
3930 being true. */
3932 static void
3933 emit_constant_insn (rtx cond, rtx pattern)
3935 if (cond)
3936 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3937 emit_insn (pattern);
3940 /* As above, but extra parameter GENERATE which, if clear, suppresses
3941 RTL generation. */
3943 static int
3944 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
3945 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3946 int generate)
3948 int can_invert = 0;
3949 int can_negate = 0;
3950 int final_invert = 0;
3951 int i;
3952 int set_sign_bit_copies = 0;
3953 int clear_sign_bit_copies = 0;
3954 int clear_zero_bit_copies = 0;
3955 int set_zero_bit_copies = 0;
3956 int insns = 0, neg_insns, inv_insns;
3957 unsigned HOST_WIDE_INT temp1, temp2;
3958 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
3959 struct four_ints *immediates;
3960 struct four_ints pos_immediates, neg_immediates, inv_immediates;
3962 /* Find out which operations are safe for a given CODE. Also do a quick
3963 check for degenerate cases; these can occur when DImode operations
3964 are split. */
3965 switch (code)
3967 case SET:
3968 can_invert = 1;
3969 break;
3971 case PLUS:
3972 can_negate = 1;
3973 break;
3975 case IOR:
3976 if (remainder == 0xffffffff)
3978 if (generate)
3979 emit_constant_insn (cond,
3980 gen_rtx_SET (VOIDmode, target,
3981 GEN_INT (ARM_SIGN_EXTEND (val))));
3982 return 1;
3985 if (remainder == 0)
3987 if (reload_completed && rtx_equal_p (target, source))
3988 return 0;
3990 if (generate)
3991 emit_constant_insn (cond,
3992 gen_rtx_SET (VOIDmode, target, source));
3993 return 1;
3995 break;
3997 case AND:
3998 if (remainder == 0)
4000 if (generate)
4001 emit_constant_insn (cond,
4002 gen_rtx_SET (VOIDmode, target, const0_rtx));
4003 return 1;
4005 if (remainder == 0xffffffff)
4007 if (reload_completed && rtx_equal_p (target, source))
4008 return 0;
4009 if (generate)
4010 emit_constant_insn (cond,
4011 gen_rtx_SET (VOIDmode, target, source));
4012 return 1;
4014 can_invert = 1;
4015 break;
4017 case XOR:
4018 if (remainder == 0)
4020 if (reload_completed && rtx_equal_p (target, source))
4021 return 0;
4022 if (generate)
4023 emit_constant_insn (cond,
4024 gen_rtx_SET (VOIDmode, target, source));
4025 return 1;
4028 if (remainder == 0xffffffff)
4030 if (generate)
4031 emit_constant_insn (cond,
4032 gen_rtx_SET (VOIDmode, target,
4033 gen_rtx_NOT (mode, source)));
4034 return 1;
4036 final_invert = 1;
4037 break;
4039 case MINUS:
4040 /* We treat MINUS as (val - source), since (source - val) is always
4041 passed as (source + (-val)). */
4042 if (remainder == 0)
4044 if (generate)
4045 emit_constant_insn (cond,
4046 gen_rtx_SET (VOIDmode, target,
4047 gen_rtx_NEG (mode, source)));
4048 return 1;
4050 if (const_ok_for_arm (val))
4052 if (generate)
4053 emit_constant_insn (cond,
4054 gen_rtx_SET (VOIDmode, target,
4055 gen_rtx_MINUS (mode, GEN_INT (val),
4056 source)));
4057 return 1;
4060 break;
4062 default:
4063 gcc_unreachable ();
4066 /* If we can do it in one insn get out quickly. */
4067 if (const_ok_for_op (val, code))
4069 if (generate)
4070 emit_constant_insn (cond,
4071 gen_rtx_SET (VOIDmode, target,
4072 (source
4073 ? gen_rtx_fmt_ee (code, mode, source,
4074 GEN_INT (val))
4075 : GEN_INT (val))));
4076 return 1;
4079 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4080 insn. */
4081 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4082 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4084 if (generate)
4086 if (mode == SImode && i == 16)
4087 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4088 smaller insn. */
4089 emit_constant_insn (cond,
4090 gen_zero_extendhisi2
4091 (target, gen_lowpart (HImode, source)));
4092 else
4093 /* Extz only supports SImode, but we can coerce the operands
4094 into that mode. */
4095 emit_constant_insn (cond,
4096 gen_extzv_t2 (gen_lowpart (SImode, target),
4097 gen_lowpart (SImode, source),
4098 GEN_INT (i), const0_rtx));
4101 return 1;
4104 /* Calculate a few attributes that may be useful for specific
4105 optimizations. */
4106 /* Count number of leading zeros. */
4107 for (i = 31; i >= 0; i--)
4109 if ((remainder & (1 << i)) == 0)
4110 clear_sign_bit_copies++;
4111 else
4112 break;
4115 /* Count number of leading 1's. */
4116 for (i = 31; i >= 0; i--)
4118 if ((remainder & (1 << i)) != 0)
4119 set_sign_bit_copies++;
4120 else
4121 break;
4124 /* Count number of trailing zero's. */
4125 for (i = 0; i <= 31; i++)
4127 if ((remainder & (1 << i)) == 0)
4128 clear_zero_bit_copies++;
4129 else
4130 break;
4133 /* Count number of trailing 1's. */
4134 for (i = 0; i <= 31; i++)
4136 if ((remainder & (1 << i)) != 0)
4137 set_zero_bit_copies++;
4138 else
4139 break;
4142 switch (code)
4144 case SET:
4145 /* See if we can do this by sign_extending a constant that is known
4146 to be negative. This is a good, way of doing it, since the shift
4147 may well merge into a subsequent insn. */
4148 if (set_sign_bit_copies > 1)
4150 if (const_ok_for_arm
4151 (temp1 = ARM_SIGN_EXTEND (remainder
4152 << (set_sign_bit_copies - 1))))
4154 if (generate)
4156 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4157 emit_constant_insn (cond,
4158 gen_rtx_SET (VOIDmode, new_src,
4159 GEN_INT (temp1)));
4160 emit_constant_insn (cond,
4161 gen_ashrsi3 (target, new_src,
4162 GEN_INT (set_sign_bit_copies - 1)));
4164 return 2;
4166 /* For an inverted constant, we will need to set the low bits,
4167 these will be shifted out of harm's way. */
4168 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4169 if (const_ok_for_arm (~temp1))
4171 if (generate)
4173 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4174 emit_constant_insn (cond,
4175 gen_rtx_SET (VOIDmode, new_src,
4176 GEN_INT (temp1)));
4177 emit_constant_insn (cond,
4178 gen_ashrsi3 (target, new_src,
4179 GEN_INT (set_sign_bit_copies - 1)));
4181 return 2;
4185 /* See if we can calculate the value as the difference between two
4186 valid immediates. */
4187 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4189 int topshift = clear_sign_bit_copies & ~1;
4191 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4192 & (0xff000000 >> topshift));
4194 /* If temp1 is zero, then that means the 9 most significant
4195 bits of remainder were 1 and we've caused it to overflow.
4196 When topshift is 0 we don't need to do anything since we
4197 can borrow from 'bit 32'. */
4198 if (temp1 == 0 && topshift != 0)
4199 temp1 = 0x80000000 >> (topshift - 1);
4201 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4203 if (const_ok_for_arm (temp2))
4205 if (generate)
4207 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4208 emit_constant_insn (cond,
4209 gen_rtx_SET (VOIDmode, new_src,
4210 GEN_INT (temp1)));
4211 emit_constant_insn (cond,
4212 gen_addsi3 (target, new_src,
4213 GEN_INT (-temp2)));
4216 return 2;
4220 /* See if we can generate this by setting the bottom (or the top)
4221 16 bits, and then shifting these into the other half of the
4222 word. We only look for the simplest cases, to do more would cost
4223 too much. Be careful, however, not to generate this when the
4224 alternative would take fewer insns. */
4225 if (val & 0xffff0000)
4227 temp1 = remainder & 0xffff0000;
4228 temp2 = remainder & 0x0000ffff;
4230 /* Overlaps outside this range are best done using other methods. */
4231 for (i = 9; i < 24; i++)
4233 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4234 && !const_ok_for_arm (temp2))
4236 rtx new_src = (subtargets
4237 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4238 : target);
4239 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4240 source, subtargets, generate);
4241 source = new_src;
4242 if (generate)
4243 emit_constant_insn
4244 (cond,
4245 gen_rtx_SET
4246 (VOIDmode, target,
4247 gen_rtx_IOR (mode,
4248 gen_rtx_ASHIFT (mode, source,
4249 GEN_INT (i)),
4250 source)));
4251 return insns + 1;
4255 /* Don't duplicate cases already considered. */
4256 for (i = 17; i < 24; i++)
4258 if (((temp1 | (temp1 >> i)) == remainder)
4259 && !const_ok_for_arm (temp1))
4261 rtx new_src = (subtargets
4262 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4263 : target);
4264 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4265 source, subtargets, generate);
4266 source = new_src;
4267 if (generate)
4268 emit_constant_insn
4269 (cond,
4270 gen_rtx_SET (VOIDmode, target,
4271 gen_rtx_IOR
4272 (mode,
4273 gen_rtx_LSHIFTRT (mode, source,
4274 GEN_INT (i)),
4275 source)));
4276 return insns + 1;
4280 break;
4282 case IOR:
4283 case XOR:
4284 /* If we have IOR or XOR, and the constant can be loaded in a
4285 single instruction, and we can find a temporary to put it in,
4286 then this can be done in two instructions instead of 3-4. */
4287 if (subtargets
4288 /* TARGET can't be NULL if SUBTARGETS is 0 */
4289 || (reload_completed && !reg_mentioned_p (target, source)))
4291 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4293 if (generate)
4295 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4297 emit_constant_insn (cond,
4298 gen_rtx_SET (VOIDmode, sub,
4299 GEN_INT (val)));
4300 emit_constant_insn (cond,
4301 gen_rtx_SET (VOIDmode, target,
4302 gen_rtx_fmt_ee (code, mode,
4303 source, sub)));
4305 return 2;
4309 if (code == XOR)
4310 break;
4312 /* Convert.
4313 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4314 and the remainder 0s for e.g. 0xfff00000)
4315 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4317 This can be done in 2 instructions by using shifts with mov or mvn.
4318 e.g. for
4319 x = x | 0xfff00000;
4320 we generate.
4321 mvn r0, r0, asl #12
4322 mvn r0, r0, lsr #12 */
4323 if (set_sign_bit_copies > 8
4324 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4326 if (generate)
4328 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4329 rtx shift = GEN_INT (set_sign_bit_copies);
4331 emit_constant_insn
4332 (cond,
4333 gen_rtx_SET (VOIDmode, sub,
4334 gen_rtx_NOT (mode,
4335 gen_rtx_ASHIFT (mode,
4336 source,
4337 shift))));
4338 emit_constant_insn
4339 (cond,
4340 gen_rtx_SET (VOIDmode, target,
4341 gen_rtx_NOT (mode,
4342 gen_rtx_LSHIFTRT (mode, sub,
4343 shift))));
4345 return 2;
4348 /* Convert
4349 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4351 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4353 For eg. r0 = r0 | 0xfff
4354 mvn r0, r0, lsr #12
4355 mvn r0, r0, asl #12
4358 if (set_zero_bit_copies > 8
4359 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4361 if (generate)
4363 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4364 rtx shift = GEN_INT (set_zero_bit_copies);
4366 emit_constant_insn
4367 (cond,
4368 gen_rtx_SET (VOIDmode, sub,
4369 gen_rtx_NOT (mode,
4370 gen_rtx_LSHIFTRT (mode,
4371 source,
4372 shift))));
4373 emit_constant_insn
4374 (cond,
4375 gen_rtx_SET (VOIDmode, target,
4376 gen_rtx_NOT (mode,
4377 gen_rtx_ASHIFT (mode, sub,
4378 shift))));
4380 return 2;
4383 /* This will never be reached for Thumb2 because orn is a valid
4384 instruction. This is for Thumb1 and the ARM 32 bit cases.
4386 x = y | constant (such that ~constant is a valid constant)
4387 Transform this to
4388 x = ~(~y & ~constant).
4390 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4392 if (generate)
4394 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4395 emit_constant_insn (cond,
4396 gen_rtx_SET (VOIDmode, sub,
4397 gen_rtx_NOT (mode, source)));
4398 source = sub;
4399 if (subtargets)
4400 sub = gen_reg_rtx (mode);
4401 emit_constant_insn (cond,
4402 gen_rtx_SET (VOIDmode, sub,
4403 gen_rtx_AND (mode, source,
4404 GEN_INT (temp1))));
4405 emit_constant_insn (cond,
4406 gen_rtx_SET (VOIDmode, target,
4407 gen_rtx_NOT (mode, sub)));
4409 return 3;
4411 break;
4413 case AND:
4414 /* See if two shifts will do 2 or more insn's worth of work. */
4415 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4417 HOST_WIDE_INT shift_mask = ((0xffffffff
4418 << (32 - clear_sign_bit_copies))
4419 & 0xffffffff);
4421 if ((remainder | shift_mask) != 0xffffffff)
4423 if (generate)
4425 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4426 insns = arm_gen_constant (AND, mode, cond,
4427 remainder | shift_mask,
4428 new_src, source, subtargets, 1);
4429 source = new_src;
4431 else
4433 rtx targ = subtargets ? NULL_RTX : target;
4434 insns = arm_gen_constant (AND, mode, cond,
4435 remainder | shift_mask,
4436 targ, source, subtargets, 0);
4440 if (generate)
4442 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4443 rtx shift = GEN_INT (clear_sign_bit_copies);
4445 emit_insn (gen_ashlsi3 (new_src, source, shift));
4446 emit_insn (gen_lshrsi3 (target, new_src, shift));
4449 return insns + 2;
4452 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4454 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4456 if ((remainder | shift_mask) != 0xffffffff)
4458 if (generate)
4460 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4462 insns = arm_gen_constant (AND, mode, cond,
4463 remainder | shift_mask,
4464 new_src, source, subtargets, 1);
4465 source = new_src;
4467 else
4469 rtx targ = subtargets ? NULL_RTX : target;
4471 insns = arm_gen_constant (AND, mode, cond,
4472 remainder | shift_mask,
4473 targ, source, subtargets, 0);
4477 if (generate)
4479 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4480 rtx shift = GEN_INT (clear_zero_bit_copies);
4482 emit_insn (gen_lshrsi3 (new_src, source, shift));
4483 emit_insn (gen_ashlsi3 (target, new_src, shift));
4486 return insns + 2;
4489 break;
4491 default:
4492 break;
4495 /* Calculate what the instruction sequences would be if we generated it
4496 normally, negated, or inverted. */
4497 if (code == AND)
4498 /* AND cannot be split into multiple insns, so invert and use BIC. */
4499 insns = 99;
4500 else
4501 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4503 if (can_negate)
4504 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4505 &neg_immediates);
4506 else
4507 neg_insns = 99;
4509 if (can_invert || final_invert)
4510 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4511 &inv_immediates);
4512 else
4513 inv_insns = 99;
4515 immediates = &pos_immediates;
4517 /* Is the negated immediate sequence more efficient? */
4518 if (neg_insns < insns && neg_insns <= inv_insns)
4520 insns = neg_insns;
4521 immediates = &neg_immediates;
4523 else
4524 can_negate = 0;
4526 /* Is the inverted immediate sequence more efficient?
4527 We must allow for an extra NOT instruction for XOR operations, although
4528 there is some chance that the final 'mvn' will get optimized later. */
4529 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4531 insns = inv_insns;
4532 immediates = &inv_immediates;
4534 else
4536 can_invert = 0;
4537 final_invert = 0;
4540 /* Now output the chosen sequence as instructions. */
4541 if (generate)
4543 for (i = 0; i < insns; i++)
4545 rtx new_src, temp1_rtx;
4547 temp1 = immediates->i[i];
4549 if (code == SET || code == MINUS)
4550 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4551 else if ((final_invert || i < (insns - 1)) && subtargets)
4552 new_src = gen_reg_rtx (mode);
4553 else
4554 new_src = target;
4556 if (can_invert)
4557 temp1 = ~temp1;
4558 else if (can_negate)
4559 temp1 = -temp1;
4561 temp1 = trunc_int_for_mode (temp1, mode);
4562 temp1_rtx = GEN_INT (temp1);
4564 if (code == SET)
4566 else if (code == MINUS)
4567 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4568 else
4569 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4571 emit_constant_insn (cond,
4572 gen_rtx_SET (VOIDmode, new_src,
4573 temp1_rtx));
4574 source = new_src;
4576 if (code == SET)
4578 can_negate = can_invert;
4579 can_invert = 0;
4580 code = PLUS;
4582 else if (code == MINUS)
4583 code = PLUS;
4587 if (final_invert)
4589 if (generate)
4590 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4591 gen_rtx_NOT (mode, source)));
4592 insns++;
4595 return insns;
4598 /* Canonicalize a comparison so that we are more likely to recognize it.
4599 This can be done for a few constant compares, where we can make the
4600 immediate value easier to load. */
4602 static void
4603 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4604 bool op0_preserve_value)
4606 machine_mode mode;
4607 unsigned HOST_WIDE_INT i, maxval;
4609 mode = GET_MODE (*op0);
4610 if (mode == VOIDmode)
4611 mode = GET_MODE (*op1);
4613 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4615 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4616 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4617 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4618 for GTU/LEU in Thumb mode. */
4619 if (mode == DImode)
4622 if (*code == GT || *code == LE
4623 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4625 /* Missing comparison. First try to use an available
4626 comparison. */
4627 if (CONST_INT_P (*op1))
4629 i = INTVAL (*op1);
4630 switch (*code)
4632 case GT:
4633 case LE:
4634 if (i != maxval
4635 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4637 *op1 = GEN_INT (i + 1);
4638 *code = *code == GT ? GE : LT;
4639 return;
4641 break;
4642 case GTU:
4643 case LEU:
4644 if (i != ~((unsigned HOST_WIDE_INT) 0)
4645 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4647 *op1 = GEN_INT (i + 1);
4648 *code = *code == GTU ? GEU : LTU;
4649 return;
4651 break;
4652 default:
4653 gcc_unreachable ();
4657 /* If that did not work, reverse the condition. */
4658 if (!op0_preserve_value)
4660 std::swap (*op0, *op1);
4661 *code = (int)swap_condition ((enum rtx_code)*code);
4664 return;
4667 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4668 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4669 to facilitate possible combining with a cmp into 'ands'. */
4670 if (mode == SImode
4671 && GET_CODE (*op0) == ZERO_EXTEND
4672 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4673 && GET_MODE (XEXP (*op0, 0)) == QImode
4674 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4675 && subreg_lowpart_p (XEXP (*op0, 0))
4676 && *op1 == const0_rtx)
4677 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4678 GEN_INT (255));
4680 /* Comparisons smaller than DImode. Only adjust comparisons against
4681 an out-of-range constant. */
4682 if (!CONST_INT_P (*op1)
4683 || const_ok_for_arm (INTVAL (*op1))
4684 || const_ok_for_arm (- INTVAL (*op1)))
4685 return;
4687 i = INTVAL (*op1);
4689 switch (*code)
4691 case EQ:
4692 case NE:
4693 return;
4695 case GT:
4696 case LE:
4697 if (i != maxval
4698 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4700 *op1 = GEN_INT (i + 1);
4701 *code = *code == GT ? GE : LT;
4702 return;
4704 break;
4706 case GE:
4707 case LT:
4708 if (i != ~maxval
4709 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4711 *op1 = GEN_INT (i - 1);
4712 *code = *code == GE ? GT : LE;
4713 return;
4715 break;
4717 case GTU:
4718 case LEU:
4719 if (i != ~((unsigned HOST_WIDE_INT) 0)
4720 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4722 *op1 = GEN_INT (i + 1);
4723 *code = *code == GTU ? GEU : LTU;
4724 return;
4726 break;
4728 case GEU:
4729 case LTU:
4730 if (i != 0
4731 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4733 *op1 = GEN_INT (i - 1);
4734 *code = *code == GEU ? GTU : LEU;
4735 return;
4737 break;
4739 default:
4740 gcc_unreachable ();
4745 /* Define how to find the value returned by a function. */
4747 static rtx
4748 arm_function_value(const_tree type, const_tree func,
4749 bool outgoing ATTRIBUTE_UNUSED)
4751 machine_mode mode;
4752 int unsignedp ATTRIBUTE_UNUSED;
4753 rtx r ATTRIBUTE_UNUSED;
4755 mode = TYPE_MODE (type);
4757 if (TARGET_AAPCS_BASED)
4758 return aapcs_allocate_return_reg (mode, type, func);
4760 /* Promote integer types. */
4761 if (INTEGRAL_TYPE_P (type))
4762 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4764 /* Promotes small structs returned in a register to full-word size
4765 for big-endian AAPCS. */
4766 if (arm_return_in_msb (type))
4768 HOST_WIDE_INT size = int_size_in_bytes (type);
4769 if (size % UNITS_PER_WORD != 0)
4771 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4772 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4776 return arm_libcall_value_1 (mode);
4779 /* libcall hashtable helpers. */
4781 struct libcall_hasher : typed_noop_remove <rtx_def>
4783 typedef rtx_def value_type;
4784 typedef rtx_def compare_type;
4785 static inline hashval_t hash (const value_type *);
4786 static inline bool equal (const value_type *, const compare_type *);
4787 static inline void remove (value_type *);
4790 inline bool
4791 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4793 return rtx_equal_p (p1, p2);
4796 inline hashval_t
4797 libcall_hasher::hash (const value_type *p1)
4799 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4802 typedef hash_table<libcall_hasher> libcall_table_type;
4804 static void
4805 add_libcall (libcall_table_type *htab, rtx libcall)
4807 *htab->find_slot (libcall, INSERT) = libcall;
4810 static bool
4811 arm_libcall_uses_aapcs_base (const_rtx libcall)
4813 static bool init_done = false;
4814 static libcall_table_type *libcall_htab = NULL;
4816 if (!init_done)
4818 init_done = true;
4820 libcall_htab = new libcall_table_type (31);
4821 add_libcall (libcall_htab,
4822 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4823 add_libcall (libcall_htab,
4824 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4825 add_libcall (libcall_htab,
4826 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4827 add_libcall (libcall_htab,
4828 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4830 add_libcall (libcall_htab,
4831 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4832 add_libcall (libcall_htab,
4833 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4834 add_libcall (libcall_htab,
4835 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4836 add_libcall (libcall_htab,
4837 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4839 add_libcall (libcall_htab,
4840 convert_optab_libfunc (sext_optab, SFmode, HFmode));
4841 add_libcall (libcall_htab,
4842 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4843 add_libcall (libcall_htab,
4844 convert_optab_libfunc (sfix_optab, SImode, DFmode));
4845 add_libcall (libcall_htab,
4846 convert_optab_libfunc (ufix_optab, SImode, DFmode));
4847 add_libcall (libcall_htab,
4848 convert_optab_libfunc (sfix_optab, DImode, DFmode));
4849 add_libcall (libcall_htab,
4850 convert_optab_libfunc (ufix_optab, DImode, DFmode));
4851 add_libcall (libcall_htab,
4852 convert_optab_libfunc (sfix_optab, DImode, SFmode));
4853 add_libcall (libcall_htab,
4854 convert_optab_libfunc (ufix_optab, DImode, SFmode));
4856 /* Values from double-precision helper functions are returned in core
4857 registers if the selected core only supports single-precision
4858 arithmetic, even if we are using the hard-float ABI. The same is
4859 true for single-precision helpers, but we will never be using the
4860 hard-float ABI on a CPU which doesn't support single-precision
4861 operations in hardware. */
4862 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4863 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4864 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4865 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4866 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4867 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4868 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4869 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4870 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4871 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4872 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4873 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4874 SFmode));
4875 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4876 DFmode));
4879 return libcall && libcall_htab->find (libcall) != NULL;
4882 static rtx
4883 arm_libcall_value_1 (machine_mode mode)
4885 if (TARGET_AAPCS_BASED)
4886 return aapcs_libcall_value (mode);
4887 else if (TARGET_IWMMXT_ABI
4888 && arm_vector_mode_supported_p (mode))
4889 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
4890 else
4891 return gen_rtx_REG (mode, ARG_REGISTER (1));
4894 /* Define how to find the value returned by a library function
4895 assuming the value has mode MODE. */
4897 static rtx
4898 arm_libcall_value (machine_mode mode, const_rtx libcall)
4900 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
4901 && GET_MODE_CLASS (mode) == MODE_FLOAT)
4903 /* The following libcalls return their result in integer registers,
4904 even though they return a floating point value. */
4905 if (arm_libcall_uses_aapcs_base (libcall))
4906 return gen_rtx_REG (mode, ARG_REGISTER(1));
4910 return arm_libcall_value_1 (mode);
4913 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4915 static bool
4916 arm_function_value_regno_p (const unsigned int regno)
4918 if (regno == ARG_REGISTER (1)
4919 || (TARGET_32BIT
4920 && TARGET_AAPCS_BASED
4921 && TARGET_VFP
4922 && TARGET_HARD_FLOAT
4923 && regno == FIRST_VFP_REGNUM)
4924 || (TARGET_IWMMXT_ABI
4925 && regno == FIRST_IWMMXT_REGNUM))
4926 return true;
4928 return false;
4931 /* Determine the amount of memory needed to store the possible return
4932 registers of an untyped call. */
4934 arm_apply_result_size (void)
4936 int size = 16;
4938 if (TARGET_32BIT)
4940 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4941 size += 32;
4942 if (TARGET_IWMMXT_ABI)
4943 size += 8;
4946 return size;
4949 /* Decide whether TYPE should be returned in memory (true)
4950 or in a register (false). FNTYPE is the type of the function making
4951 the call. */
4952 static bool
4953 arm_return_in_memory (const_tree type, const_tree fntype)
4955 HOST_WIDE_INT size;
4957 size = int_size_in_bytes (type); /* Negative if not fixed size. */
4959 if (TARGET_AAPCS_BASED)
4961 /* Simple, non-aggregate types (ie not including vectors and
4962 complex) are always returned in a register (or registers).
4963 We don't care about which register here, so we can short-cut
4964 some of the detail. */
4965 if (!AGGREGATE_TYPE_P (type)
4966 && TREE_CODE (type) != VECTOR_TYPE
4967 && TREE_CODE (type) != COMPLEX_TYPE)
4968 return false;
4970 /* Any return value that is no larger than one word can be
4971 returned in r0. */
4972 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
4973 return false;
4975 /* Check any available co-processors to see if they accept the
4976 type as a register candidate (VFP, for example, can return
4977 some aggregates in consecutive registers). These aren't
4978 available if the call is variadic. */
4979 if (aapcs_select_return_coproc (type, fntype) >= 0)
4980 return false;
4982 /* Vector values should be returned using ARM registers, not
4983 memory (unless they're over 16 bytes, which will break since
4984 we only have four call-clobbered registers to play with). */
4985 if (TREE_CODE (type) == VECTOR_TYPE)
4986 return (size < 0 || size > (4 * UNITS_PER_WORD));
4988 /* The rest go in memory. */
4989 return true;
4992 if (TREE_CODE (type) == VECTOR_TYPE)
4993 return (size < 0 || size > (4 * UNITS_PER_WORD));
4995 if (!AGGREGATE_TYPE_P (type) &&
4996 (TREE_CODE (type) != VECTOR_TYPE))
4997 /* All simple types are returned in registers. */
4998 return false;
5000 if (arm_abi != ARM_ABI_APCS)
5002 /* ATPCS and later return aggregate types in memory only if they are
5003 larger than a word (or are variable size). */
5004 return (size < 0 || size > UNITS_PER_WORD);
5007 /* For the arm-wince targets we choose to be compatible with Microsoft's
5008 ARM and Thumb compilers, which always return aggregates in memory. */
5009 #ifndef ARM_WINCE
5010 /* All structures/unions bigger than one word are returned in memory.
5011 Also catch the case where int_size_in_bytes returns -1. In this case
5012 the aggregate is either huge or of variable size, and in either case
5013 we will want to return it via memory and not in a register. */
5014 if (size < 0 || size > UNITS_PER_WORD)
5015 return true;
5017 if (TREE_CODE (type) == RECORD_TYPE)
5019 tree field;
5021 /* For a struct the APCS says that we only return in a register
5022 if the type is 'integer like' and every addressable element
5023 has an offset of zero. For practical purposes this means
5024 that the structure can have at most one non bit-field element
5025 and that this element must be the first one in the structure. */
5027 /* Find the first field, ignoring non FIELD_DECL things which will
5028 have been created by C++. */
5029 for (field = TYPE_FIELDS (type);
5030 field && TREE_CODE (field) != FIELD_DECL;
5031 field = DECL_CHAIN (field))
5032 continue;
5034 if (field == NULL)
5035 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5037 /* Check that the first field is valid for returning in a register. */
5039 /* ... Floats are not allowed */
5040 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5041 return true;
5043 /* ... Aggregates that are not themselves valid for returning in
5044 a register are not allowed. */
5045 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5046 return true;
5048 /* Now check the remaining fields, if any. Only bitfields are allowed,
5049 since they are not addressable. */
5050 for (field = DECL_CHAIN (field);
5051 field;
5052 field = DECL_CHAIN (field))
5054 if (TREE_CODE (field) != FIELD_DECL)
5055 continue;
5057 if (!DECL_BIT_FIELD_TYPE (field))
5058 return true;
5061 return false;
5064 if (TREE_CODE (type) == UNION_TYPE)
5066 tree field;
5068 /* Unions can be returned in registers if every element is
5069 integral, or can be returned in an integer register. */
5070 for (field = TYPE_FIELDS (type);
5071 field;
5072 field = DECL_CHAIN (field))
5074 if (TREE_CODE (field) != FIELD_DECL)
5075 continue;
5077 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5078 return true;
5080 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5081 return true;
5084 return false;
5086 #endif /* not ARM_WINCE */
5088 /* Return all other types in memory. */
5089 return true;
5092 const struct pcs_attribute_arg
5094 const char *arg;
5095 enum arm_pcs value;
5096 } pcs_attribute_args[] =
5098 {"aapcs", ARM_PCS_AAPCS},
5099 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5100 #if 0
5101 /* We could recognize these, but changes would be needed elsewhere
5102 * to implement them. */
5103 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5104 {"atpcs", ARM_PCS_ATPCS},
5105 {"apcs", ARM_PCS_APCS},
5106 #endif
5107 {NULL, ARM_PCS_UNKNOWN}
5110 static enum arm_pcs
5111 arm_pcs_from_attribute (tree attr)
5113 const struct pcs_attribute_arg *ptr;
5114 const char *arg;
5116 /* Get the value of the argument. */
5117 if (TREE_VALUE (attr) == NULL_TREE
5118 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5119 return ARM_PCS_UNKNOWN;
5121 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5123 /* Check it against the list of known arguments. */
5124 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5125 if (streq (arg, ptr->arg))
5126 return ptr->value;
5128 /* An unrecognized interrupt type. */
5129 return ARM_PCS_UNKNOWN;
5132 /* Get the PCS variant to use for this call. TYPE is the function's type
5133 specification, DECL is the specific declartion. DECL may be null if
5134 the call could be indirect or if this is a library call. */
5135 static enum arm_pcs
5136 arm_get_pcs_model (const_tree type, const_tree decl)
5138 bool user_convention = false;
5139 enum arm_pcs user_pcs = arm_pcs_default;
5140 tree attr;
5142 gcc_assert (type);
5144 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5145 if (attr)
5147 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5148 user_convention = true;
5151 if (TARGET_AAPCS_BASED)
5153 /* Detect varargs functions. These always use the base rules
5154 (no argument is ever a candidate for a co-processor
5155 register). */
5156 bool base_rules = stdarg_p (type);
5158 if (user_convention)
5160 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5161 sorry ("non-AAPCS derived PCS variant");
5162 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5163 error ("variadic functions must use the base AAPCS variant");
5166 if (base_rules)
5167 return ARM_PCS_AAPCS;
5168 else if (user_convention)
5169 return user_pcs;
5170 else if (decl && flag_unit_at_a_time)
5172 /* Local functions never leak outside this compilation unit,
5173 so we are free to use whatever conventions are
5174 appropriate. */
5175 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5176 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5177 if (i && i->local)
5178 return ARM_PCS_AAPCS_LOCAL;
5181 else if (user_convention && user_pcs != arm_pcs_default)
5182 sorry ("PCS variant");
5184 /* For everything else we use the target's default. */
5185 return arm_pcs_default;
5189 static void
5190 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5191 const_tree fntype ATTRIBUTE_UNUSED,
5192 rtx libcall ATTRIBUTE_UNUSED,
5193 const_tree fndecl ATTRIBUTE_UNUSED)
5195 /* Record the unallocated VFP registers. */
5196 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5197 pcum->aapcs_vfp_reg_alloc = 0;
5200 /* Walk down the type tree of TYPE counting consecutive base elements.
5201 If *MODEP is VOIDmode, then set it to the first valid floating point
5202 type. If a non-floating point type is found, or if a floating point
5203 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5204 otherwise return the count in the sub-tree. */
5205 static int
5206 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5208 machine_mode mode;
5209 HOST_WIDE_INT size;
5211 switch (TREE_CODE (type))
5213 case REAL_TYPE:
5214 mode = TYPE_MODE (type);
5215 if (mode != DFmode && mode != SFmode)
5216 return -1;
5218 if (*modep == VOIDmode)
5219 *modep = mode;
5221 if (*modep == mode)
5222 return 1;
5224 break;
5226 case COMPLEX_TYPE:
5227 mode = TYPE_MODE (TREE_TYPE (type));
5228 if (mode != DFmode && mode != SFmode)
5229 return -1;
5231 if (*modep == VOIDmode)
5232 *modep = mode;
5234 if (*modep == mode)
5235 return 2;
5237 break;
5239 case VECTOR_TYPE:
5240 /* Use V2SImode and V4SImode as representatives of all 64-bit
5241 and 128-bit vector types, whether or not those modes are
5242 supported with the present options. */
5243 size = int_size_in_bytes (type);
5244 switch (size)
5246 case 8:
5247 mode = V2SImode;
5248 break;
5249 case 16:
5250 mode = V4SImode;
5251 break;
5252 default:
5253 return -1;
5256 if (*modep == VOIDmode)
5257 *modep = mode;
5259 /* Vector modes are considered to be opaque: two vectors are
5260 equivalent for the purposes of being homogeneous aggregates
5261 if they are the same size. */
5262 if (*modep == mode)
5263 return 1;
5265 break;
5267 case ARRAY_TYPE:
5269 int count;
5270 tree index = TYPE_DOMAIN (type);
5272 /* Can't handle incomplete types nor sizes that are not
5273 fixed. */
5274 if (!COMPLETE_TYPE_P (type)
5275 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5276 return -1;
5278 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5279 if (count == -1
5280 || !index
5281 || !TYPE_MAX_VALUE (index)
5282 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5283 || !TYPE_MIN_VALUE (index)
5284 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5285 || count < 0)
5286 return -1;
5288 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5289 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5291 /* There must be no padding. */
5292 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5293 return -1;
5295 return count;
5298 case RECORD_TYPE:
5300 int count = 0;
5301 int sub_count;
5302 tree field;
5304 /* Can't handle incomplete types nor sizes that are not
5305 fixed. */
5306 if (!COMPLETE_TYPE_P (type)
5307 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5308 return -1;
5310 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5312 if (TREE_CODE (field) != FIELD_DECL)
5313 continue;
5315 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5316 if (sub_count < 0)
5317 return -1;
5318 count += sub_count;
5321 /* There must be no padding. */
5322 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5323 return -1;
5325 return count;
5328 case UNION_TYPE:
5329 case QUAL_UNION_TYPE:
5331 /* These aren't very interesting except in a degenerate case. */
5332 int count = 0;
5333 int sub_count;
5334 tree field;
5336 /* Can't handle incomplete types nor sizes that are not
5337 fixed. */
5338 if (!COMPLETE_TYPE_P (type)
5339 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5340 return -1;
5342 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5344 if (TREE_CODE (field) != FIELD_DECL)
5345 continue;
5347 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5348 if (sub_count < 0)
5349 return -1;
5350 count = count > sub_count ? count : sub_count;
5353 /* There must be no padding. */
5354 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5355 return -1;
5357 return count;
5360 default:
5361 break;
5364 return -1;
5367 /* Return true if PCS_VARIANT should use VFP registers. */
5368 static bool
5369 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5371 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5373 static bool seen_thumb1_vfp = false;
5375 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5377 sorry ("Thumb-1 hard-float VFP ABI");
5378 /* sorry() is not immediately fatal, so only display this once. */
5379 seen_thumb1_vfp = true;
5382 return true;
5385 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5386 return false;
5388 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5389 (TARGET_VFP_DOUBLE || !is_double));
5392 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5393 suitable for passing or returning in VFP registers for the PCS
5394 variant selected. If it is, then *BASE_MODE is updated to contain
5395 a machine mode describing each element of the argument's type and
5396 *COUNT to hold the number of such elements. */
5397 static bool
5398 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5399 machine_mode mode, const_tree type,
5400 machine_mode *base_mode, int *count)
5402 machine_mode new_mode = VOIDmode;
5404 /* If we have the type information, prefer that to working things
5405 out from the mode. */
5406 if (type)
5408 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5410 if (ag_count > 0 && ag_count <= 4)
5411 *count = ag_count;
5412 else
5413 return false;
5415 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5416 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5417 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5419 *count = 1;
5420 new_mode = mode;
5422 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5424 *count = 2;
5425 new_mode = (mode == DCmode ? DFmode : SFmode);
5427 else
5428 return false;
5431 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5432 return false;
5434 *base_mode = new_mode;
5435 return true;
5438 static bool
5439 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5440 machine_mode mode, const_tree type)
5442 int count ATTRIBUTE_UNUSED;
5443 machine_mode ag_mode ATTRIBUTE_UNUSED;
5445 if (!use_vfp_abi (pcs_variant, false))
5446 return false;
5447 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5448 &ag_mode, &count);
5451 static bool
5452 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5453 const_tree type)
5455 if (!use_vfp_abi (pcum->pcs_variant, false))
5456 return false;
5458 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5459 &pcum->aapcs_vfp_rmode,
5460 &pcum->aapcs_vfp_rcount);
5463 static bool
5464 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5465 const_tree type ATTRIBUTE_UNUSED)
5467 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5468 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5469 int regno;
5471 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5472 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5474 pcum->aapcs_vfp_reg_alloc = mask << regno;
5475 if (mode == BLKmode
5476 || (mode == TImode && ! TARGET_NEON)
5477 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5479 int i;
5480 int rcount = pcum->aapcs_vfp_rcount;
5481 int rshift = shift;
5482 machine_mode rmode = pcum->aapcs_vfp_rmode;
5483 rtx par;
5484 if (!TARGET_NEON)
5486 /* Avoid using unsupported vector modes. */
5487 if (rmode == V2SImode)
5488 rmode = DImode;
5489 else if (rmode == V4SImode)
5491 rmode = DImode;
5492 rcount *= 2;
5493 rshift /= 2;
5496 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5497 for (i = 0; i < rcount; i++)
5499 rtx tmp = gen_rtx_REG (rmode,
5500 FIRST_VFP_REGNUM + regno + i * rshift);
5501 tmp = gen_rtx_EXPR_LIST
5502 (VOIDmode, tmp,
5503 GEN_INT (i * GET_MODE_SIZE (rmode)));
5504 XVECEXP (par, 0, i) = tmp;
5507 pcum->aapcs_reg = par;
5509 else
5510 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5511 return true;
5513 return false;
5516 static rtx
5517 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5518 machine_mode mode,
5519 const_tree type ATTRIBUTE_UNUSED)
5521 if (!use_vfp_abi (pcs_variant, false))
5522 return NULL;
5524 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5526 int count;
5527 machine_mode ag_mode;
5528 int i;
5529 rtx par;
5530 int shift;
5532 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5533 &ag_mode, &count);
5535 if (!TARGET_NEON)
5537 if (ag_mode == V2SImode)
5538 ag_mode = DImode;
5539 else if (ag_mode == V4SImode)
5541 ag_mode = DImode;
5542 count *= 2;
5545 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5546 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5547 for (i = 0; i < count; i++)
5549 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5550 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5551 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5552 XVECEXP (par, 0, i) = tmp;
5555 return par;
5558 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5561 static void
5562 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5563 machine_mode mode ATTRIBUTE_UNUSED,
5564 const_tree type ATTRIBUTE_UNUSED)
5566 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5567 pcum->aapcs_vfp_reg_alloc = 0;
5568 return;
5571 #define AAPCS_CP(X) \
5573 aapcs_ ## X ## _cum_init, \
5574 aapcs_ ## X ## _is_call_candidate, \
5575 aapcs_ ## X ## _allocate, \
5576 aapcs_ ## X ## _is_return_candidate, \
5577 aapcs_ ## X ## _allocate_return_reg, \
5578 aapcs_ ## X ## _advance \
5581 /* Table of co-processors that can be used to pass arguments in
5582 registers. Idealy no arugment should be a candidate for more than
5583 one co-processor table entry, but the table is processed in order
5584 and stops after the first match. If that entry then fails to put
5585 the argument into a co-processor register, the argument will go on
5586 the stack. */
5587 static struct
5589 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5590 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5592 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5593 BLKmode) is a candidate for this co-processor's registers; this
5594 function should ignore any position-dependent state in
5595 CUMULATIVE_ARGS and only use call-type dependent information. */
5596 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5598 /* Return true if the argument does get a co-processor register; it
5599 should set aapcs_reg to an RTX of the register allocated as is
5600 required for a return from FUNCTION_ARG. */
5601 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5603 /* Return true if a result of mode MODE (or type TYPE if MODE is
5604 BLKmode) is can be returned in this co-processor's registers. */
5605 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
5607 /* Allocate and return an RTX element to hold the return type of a
5608 call, this routine must not fail and will only be called if
5609 is_return_candidate returned true with the same parameters. */
5610 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
5612 /* Finish processing this argument and prepare to start processing
5613 the next one. */
5614 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5615 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5617 AAPCS_CP(vfp)
5620 #undef AAPCS_CP
5622 static int
5623 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
5624 const_tree type)
5626 int i;
5628 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5629 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5630 return i;
5632 return -1;
5635 static int
5636 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5638 /* We aren't passed a decl, so we can't check that a call is local.
5639 However, it isn't clear that that would be a win anyway, since it
5640 might limit some tail-calling opportunities. */
5641 enum arm_pcs pcs_variant;
5643 if (fntype)
5645 const_tree fndecl = NULL_TREE;
5647 if (TREE_CODE (fntype) == FUNCTION_DECL)
5649 fndecl = fntype;
5650 fntype = TREE_TYPE (fntype);
5653 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5655 else
5656 pcs_variant = arm_pcs_default;
5658 if (pcs_variant != ARM_PCS_AAPCS)
5660 int i;
5662 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5663 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5664 TYPE_MODE (type),
5665 type))
5666 return i;
5668 return -1;
5671 static rtx
5672 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
5673 const_tree fntype)
5675 /* We aren't passed a decl, so we can't check that a call is local.
5676 However, it isn't clear that that would be a win anyway, since it
5677 might limit some tail-calling opportunities. */
5678 enum arm_pcs pcs_variant;
5679 int unsignedp ATTRIBUTE_UNUSED;
5681 if (fntype)
5683 const_tree fndecl = NULL_TREE;
5685 if (TREE_CODE (fntype) == FUNCTION_DECL)
5687 fndecl = fntype;
5688 fntype = TREE_TYPE (fntype);
5691 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5693 else
5694 pcs_variant = arm_pcs_default;
5696 /* Promote integer types. */
5697 if (type && INTEGRAL_TYPE_P (type))
5698 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5700 if (pcs_variant != ARM_PCS_AAPCS)
5702 int i;
5704 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5705 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5706 type))
5707 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5708 mode, type);
5711 /* Promotes small structs returned in a register to full-word size
5712 for big-endian AAPCS. */
5713 if (type && arm_return_in_msb (type))
5715 HOST_WIDE_INT size = int_size_in_bytes (type);
5716 if (size % UNITS_PER_WORD != 0)
5718 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5719 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5723 return gen_rtx_REG (mode, R0_REGNUM);
5726 static rtx
5727 aapcs_libcall_value (machine_mode mode)
5729 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5730 && GET_MODE_SIZE (mode) <= 4)
5731 mode = SImode;
5733 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5736 /* Lay out a function argument using the AAPCS rules. The rule
5737 numbers referred to here are those in the AAPCS. */
5738 static void
5739 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
5740 const_tree type, bool named)
5742 int nregs, nregs2;
5743 int ncrn;
5745 /* We only need to do this once per argument. */
5746 if (pcum->aapcs_arg_processed)
5747 return;
5749 pcum->aapcs_arg_processed = true;
5751 /* Special case: if named is false then we are handling an incoming
5752 anonymous argument which is on the stack. */
5753 if (!named)
5754 return;
5756 /* Is this a potential co-processor register candidate? */
5757 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5759 int slot = aapcs_select_call_coproc (pcum, mode, type);
5760 pcum->aapcs_cprc_slot = slot;
5762 /* We don't have to apply any of the rules from part B of the
5763 preparation phase, these are handled elsewhere in the
5764 compiler. */
5766 if (slot >= 0)
5768 /* A Co-processor register candidate goes either in its own
5769 class of registers or on the stack. */
5770 if (!pcum->aapcs_cprc_failed[slot])
5772 /* C1.cp - Try to allocate the argument to co-processor
5773 registers. */
5774 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5775 return;
5777 /* C2.cp - Put the argument on the stack and note that we
5778 can't assign any more candidates in this slot. We also
5779 need to note that we have allocated stack space, so that
5780 we won't later try to split a non-cprc candidate between
5781 core registers and the stack. */
5782 pcum->aapcs_cprc_failed[slot] = true;
5783 pcum->can_split = false;
5786 /* We didn't get a register, so this argument goes on the
5787 stack. */
5788 gcc_assert (pcum->can_split == false);
5789 return;
5793 /* C3 - For double-word aligned arguments, round the NCRN up to the
5794 next even number. */
5795 ncrn = pcum->aapcs_ncrn;
5796 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5797 ncrn++;
5799 nregs = ARM_NUM_REGS2(mode, type);
5801 /* Sigh, this test should really assert that nregs > 0, but a GCC
5802 extension allows empty structs and then gives them empty size; it
5803 then allows such a structure to be passed by value. For some of
5804 the code below we have to pretend that such an argument has
5805 non-zero size so that we 'locate' it correctly either in
5806 registers or on the stack. */
5807 gcc_assert (nregs >= 0);
5809 nregs2 = nregs ? nregs : 1;
5811 /* C4 - Argument fits entirely in core registers. */
5812 if (ncrn + nregs2 <= NUM_ARG_REGS)
5814 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5815 pcum->aapcs_next_ncrn = ncrn + nregs;
5816 return;
5819 /* C5 - Some core registers left and there are no arguments already
5820 on the stack: split this argument between the remaining core
5821 registers and the stack. */
5822 if (ncrn < NUM_ARG_REGS && pcum->can_split)
5824 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5825 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5826 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5827 return;
5830 /* C6 - NCRN is set to 4. */
5831 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5833 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5834 return;
5837 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5838 for a call to a function whose data type is FNTYPE.
5839 For a library call, FNTYPE is NULL. */
5840 void
5841 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5842 rtx libname,
5843 tree fndecl ATTRIBUTE_UNUSED)
5845 /* Long call handling. */
5846 if (fntype)
5847 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5848 else
5849 pcum->pcs_variant = arm_pcs_default;
5851 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5853 if (arm_libcall_uses_aapcs_base (libname))
5854 pcum->pcs_variant = ARM_PCS_AAPCS;
5856 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5857 pcum->aapcs_reg = NULL_RTX;
5858 pcum->aapcs_partial = 0;
5859 pcum->aapcs_arg_processed = false;
5860 pcum->aapcs_cprc_slot = -1;
5861 pcum->can_split = true;
5863 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5865 int i;
5867 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5869 pcum->aapcs_cprc_failed[i] = false;
5870 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5873 return;
5876 /* Legacy ABIs */
5878 /* On the ARM, the offset starts at 0. */
5879 pcum->nregs = 0;
5880 pcum->iwmmxt_nregs = 0;
5881 pcum->can_split = true;
5883 /* Varargs vectors are treated the same as long long.
5884 named_count avoids having to change the way arm handles 'named' */
5885 pcum->named_count = 0;
5886 pcum->nargs = 0;
5888 if (TARGET_REALLY_IWMMXT && fntype)
5890 tree fn_arg;
5892 for (fn_arg = TYPE_ARG_TYPES (fntype);
5893 fn_arg;
5894 fn_arg = TREE_CHAIN (fn_arg))
5895 pcum->named_count += 1;
5897 if (! pcum->named_count)
5898 pcum->named_count = INT_MAX;
5902 /* Return true if we use LRA instead of reload pass. */
5903 static bool
5904 arm_lra_p (void)
5906 return arm_lra_flag;
5909 /* Return true if mode/type need doubleword alignment. */
5910 static bool
5911 arm_needs_doubleword_align (machine_mode mode, const_tree type)
5913 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
5914 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
5918 /* Determine where to put an argument to a function.
5919 Value is zero to push the argument on the stack,
5920 or a hard register in which to store the argument.
5922 MODE is the argument's machine mode.
5923 TYPE is the data type of the argument (as a tree).
5924 This is null for libcalls where that information may
5925 not be available.
5926 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5927 the preceding args and about the function being called.
5928 NAMED is nonzero if this argument is a named parameter
5929 (otherwise it is an extra parameter matching an ellipsis).
5931 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5932 other arguments are passed on the stack. If (NAMED == 0) (which happens
5933 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5934 defined), say it is passed in the stack (function_prologue will
5935 indeed make it pass in the stack if necessary). */
5937 static rtx
5938 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
5939 const_tree type, bool named)
5941 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5942 int nregs;
5944 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5945 a call insn (op3 of a call_value insn). */
5946 if (mode == VOIDmode)
5947 return const0_rtx;
5949 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5951 aapcs_layout_arg (pcum, mode, type, named);
5952 return pcum->aapcs_reg;
5955 /* Varargs vectors are treated the same as long long.
5956 named_count avoids having to change the way arm handles 'named' */
5957 if (TARGET_IWMMXT_ABI
5958 && arm_vector_mode_supported_p (mode)
5959 && pcum->named_count > pcum->nargs + 1)
5961 if (pcum->iwmmxt_nregs <= 9)
5962 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
5963 else
5965 pcum->can_split = false;
5966 return NULL_RTX;
5970 /* Put doubleword aligned quantities in even register pairs. */
5971 if (pcum->nregs & 1
5972 && ARM_DOUBLEWORD_ALIGN
5973 && arm_needs_doubleword_align (mode, type))
5974 pcum->nregs++;
5976 /* Only allow splitting an arg between regs and memory if all preceding
5977 args were allocated to regs. For args passed by reference we only count
5978 the reference pointer. */
5979 if (pcum->can_split)
5980 nregs = 1;
5981 else
5982 nregs = ARM_NUM_REGS2 (mode, type);
5984 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
5985 return NULL_RTX;
5987 return gen_rtx_REG (mode, pcum->nregs);
5990 static unsigned int
5991 arm_function_arg_boundary (machine_mode mode, const_tree type)
5993 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
5994 ? DOUBLEWORD_ALIGNMENT
5995 : PARM_BOUNDARY);
5998 static int
5999 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6000 tree type, bool named)
6002 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6003 int nregs = pcum->nregs;
6005 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6007 aapcs_layout_arg (pcum, mode, type, named);
6008 return pcum->aapcs_partial;
6011 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6012 return 0;
6014 if (NUM_ARG_REGS > nregs
6015 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6016 && pcum->can_split)
6017 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6019 return 0;
6022 /* Update the data in PCUM to advance over an argument
6023 of mode MODE and data type TYPE.
6024 (TYPE is null for libcalls where that information may not be available.) */
6026 static void
6027 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6028 const_tree type, bool named)
6030 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6032 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6034 aapcs_layout_arg (pcum, mode, type, named);
6036 if (pcum->aapcs_cprc_slot >= 0)
6038 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6039 type);
6040 pcum->aapcs_cprc_slot = -1;
6043 /* Generic stuff. */
6044 pcum->aapcs_arg_processed = false;
6045 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6046 pcum->aapcs_reg = NULL_RTX;
6047 pcum->aapcs_partial = 0;
6049 else
6051 pcum->nargs += 1;
6052 if (arm_vector_mode_supported_p (mode)
6053 && pcum->named_count > pcum->nargs
6054 && TARGET_IWMMXT_ABI)
6055 pcum->iwmmxt_nregs += 1;
6056 else
6057 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6061 /* Variable sized types are passed by reference. This is a GCC
6062 extension to the ARM ABI. */
6064 static bool
6065 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6066 machine_mode mode ATTRIBUTE_UNUSED,
6067 const_tree type, bool named ATTRIBUTE_UNUSED)
6069 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6072 /* Encode the current state of the #pragma [no_]long_calls. */
6073 typedef enum
6075 OFF, /* No #pragma [no_]long_calls is in effect. */
6076 LONG, /* #pragma long_calls is in effect. */
6077 SHORT /* #pragma no_long_calls is in effect. */
6078 } arm_pragma_enum;
6080 static arm_pragma_enum arm_pragma_long_calls = OFF;
6082 void
6083 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6085 arm_pragma_long_calls = LONG;
6088 void
6089 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6091 arm_pragma_long_calls = SHORT;
6094 void
6095 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6097 arm_pragma_long_calls = OFF;
6100 /* Handle an attribute requiring a FUNCTION_DECL;
6101 arguments as in struct attribute_spec.handler. */
6102 static tree
6103 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6104 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6106 if (TREE_CODE (*node) != FUNCTION_DECL)
6108 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6109 name);
6110 *no_add_attrs = true;
6113 return NULL_TREE;
6116 /* Handle an "interrupt" or "isr" attribute;
6117 arguments as in struct attribute_spec.handler. */
6118 static tree
6119 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6120 bool *no_add_attrs)
6122 if (DECL_P (*node))
6124 if (TREE_CODE (*node) != FUNCTION_DECL)
6126 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6127 name);
6128 *no_add_attrs = true;
6130 /* FIXME: the argument if any is checked for type attributes;
6131 should it be checked for decl ones? */
6133 else
6135 if (TREE_CODE (*node) == FUNCTION_TYPE
6136 || TREE_CODE (*node) == METHOD_TYPE)
6138 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6140 warning (OPT_Wattributes, "%qE attribute ignored",
6141 name);
6142 *no_add_attrs = true;
6145 else if (TREE_CODE (*node) == POINTER_TYPE
6146 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6147 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6148 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6150 *node = build_variant_type_copy (*node);
6151 TREE_TYPE (*node) = build_type_attribute_variant
6152 (TREE_TYPE (*node),
6153 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6154 *no_add_attrs = true;
6156 else
6158 /* Possibly pass this attribute on from the type to a decl. */
6159 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6160 | (int) ATTR_FLAG_FUNCTION_NEXT
6161 | (int) ATTR_FLAG_ARRAY_NEXT))
6163 *no_add_attrs = true;
6164 return tree_cons (name, args, NULL_TREE);
6166 else
6168 warning (OPT_Wattributes, "%qE attribute ignored",
6169 name);
6174 return NULL_TREE;
6177 /* Handle a "pcs" attribute; arguments as in struct
6178 attribute_spec.handler. */
6179 static tree
6180 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6181 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6183 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6185 warning (OPT_Wattributes, "%qE attribute ignored", name);
6186 *no_add_attrs = true;
6188 return NULL_TREE;
6191 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6192 /* Handle the "notshared" attribute. This attribute is another way of
6193 requesting hidden visibility. ARM's compiler supports
6194 "__declspec(notshared)"; we support the same thing via an
6195 attribute. */
6197 static tree
6198 arm_handle_notshared_attribute (tree *node,
6199 tree name ATTRIBUTE_UNUSED,
6200 tree args ATTRIBUTE_UNUSED,
6201 int flags ATTRIBUTE_UNUSED,
6202 bool *no_add_attrs)
6204 tree decl = TYPE_NAME (*node);
6206 if (decl)
6208 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6209 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6210 *no_add_attrs = false;
6212 return NULL_TREE;
6214 #endif
6216 /* Return 0 if the attributes for two types are incompatible, 1 if they
6217 are compatible, and 2 if they are nearly compatible (which causes a
6218 warning to be generated). */
6219 static int
6220 arm_comp_type_attributes (const_tree type1, const_tree type2)
6222 int l1, l2, s1, s2;
6224 /* Check for mismatch of non-default calling convention. */
6225 if (TREE_CODE (type1) != FUNCTION_TYPE)
6226 return 1;
6228 /* Check for mismatched call attributes. */
6229 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6230 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6231 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6232 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6234 /* Only bother to check if an attribute is defined. */
6235 if (l1 | l2 | s1 | s2)
6237 /* If one type has an attribute, the other must have the same attribute. */
6238 if ((l1 != l2) || (s1 != s2))
6239 return 0;
6241 /* Disallow mixed attributes. */
6242 if ((l1 & s2) || (l2 & s1))
6243 return 0;
6246 /* Check for mismatched ISR attribute. */
6247 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6248 if (! l1)
6249 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6250 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6251 if (! l2)
6252 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6253 if (l1 != l2)
6254 return 0;
6256 return 1;
6259 /* Assigns default attributes to newly defined type. This is used to
6260 set short_call/long_call attributes for function types of
6261 functions defined inside corresponding #pragma scopes. */
6262 static void
6263 arm_set_default_type_attributes (tree type)
6265 /* Add __attribute__ ((long_call)) to all functions, when
6266 inside #pragma long_calls or __attribute__ ((short_call)),
6267 when inside #pragma no_long_calls. */
6268 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6270 tree type_attr_list, attr_name;
6271 type_attr_list = TYPE_ATTRIBUTES (type);
6273 if (arm_pragma_long_calls == LONG)
6274 attr_name = get_identifier ("long_call");
6275 else if (arm_pragma_long_calls == SHORT)
6276 attr_name = get_identifier ("short_call");
6277 else
6278 return;
6280 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6281 TYPE_ATTRIBUTES (type) = type_attr_list;
6285 /* Return true if DECL is known to be linked into section SECTION. */
6287 static bool
6288 arm_function_in_section_p (tree decl, section *section)
6290 /* We can only be certain about functions defined in the same
6291 compilation unit. */
6292 if (!TREE_STATIC (decl))
6293 return false;
6295 /* Make sure that SYMBOL always binds to the definition in this
6296 compilation unit. */
6297 if (!targetm.binds_local_p (decl))
6298 return false;
6300 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6301 if (!DECL_SECTION_NAME (decl))
6303 /* Make sure that we will not create a unique section for DECL. */
6304 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6305 return false;
6308 return function_section (decl) == section;
6311 /* Return nonzero if a 32-bit "long_call" should be generated for
6312 a call from the current function to DECL. We generate a long_call
6313 if the function:
6315 a. has an __attribute__((long call))
6316 or b. is within the scope of a #pragma long_calls
6317 or c. the -mlong-calls command line switch has been specified
6319 However we do not generate a long call if the function:
6321 d. has an __attribute__ ((short_call))
6322 or e. is inside the scope of a #pragma no_long_calls
6323 or f. is defined in the same section as the current function. */
6325 bool
6326 arm_is_long_call_p (tree decl)
6328 tree attrs;
6330 if (!decl)
6331 return TARGET_LONG_CALLS;
6333 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6334 if (lookup_attribute ("short_call", attrs))
6335 return false;
6337 /* For "f", be conservative, and only cater for cases in which the
6338 whole of the current function is placed in the same section. */
6339 if (!flag_reorder_blocks_and_partition
6340 && TREE_CODE (decl) == FUNCTION_DECL
6341 && arm_function_in_section_p (decl, current_function_section ()))
6342 return false;
6344 if (lookup_attribute ("long_call", attrs))
6345 return true;
6347 return TARGET_LONG_CALLS;
6350 /* Return nonzero if it is ok to make a tail-call to DECL. */
6351 static bool
6352 arm_function_ok_for_sibcall (tree decl, tree exp)
6354 unsigned long func_type;
6356 if (cfun->machine->sibcall_blocked)
6357 return false;
6359 /* Never tailcall something if we are generating code for Thumb-1. */
6360 if (TARGET_THUMB1)
6361 return false;
6363 /* The PIC register is live on entry to VxWorks PLT entries, so we
6364 must make the call before restoring the PIC register. */
6365 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6366 return false;
6368 /* If we are interworking and the function is not declared static
6369 then we can't tail-call it unless we know that it exists in this
6370 compilation unit (since it might be a Thumb routine). */
6371 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6372 && !TREE_ASM_WRITTEN (decl))
6373 return false;
6375 func_type = arm_current_func_type ();
6376 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6377 if (IS_INTERRUPT (func_type))
6378 return false;
6380 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6382 /* Check that the return value locations are the same. For
6383 example that we aren't returning a value from the sibling in
6384 a VFP register but then need to transfer it to a core
6385 register. */
6386 rtx a, b;
6388 a = arm_function_value (TREE_TYPE (exp), decl, false);
6389 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6390 cfun->decl, false);
6391 if (!rtx_equal_p (a, b))
6392 return false;
6395 /* Never tailcall if function may be called with a misaligned SP. */
6396 if (IS_STACKALIGN (func_type))
6397 return false;
6399 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6400 references should become a NOP. Don't convert such calls into
6401 sibling calls. */
6402 if (TARGET_AAPCS_BASED
6403 && arm_abi == ARM_ABI_AAPCS
6404 && decl
6405 && DECL_WEAK (decl))
6406 return false;
6408 /* Everything else is ok. */
6409 return true;
6413 /* Addressing mode support functions. */
6415 /* Return nonzero if X is a legitimate immediate operand when compiling
6416 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6418 legitimate_pic_operand_p (rtx x)
6420 if (GET_CODE (x) == SYMBOL_REF
6421 || (GET_CODE (x) == CONST
6422 && GET_CODE (XEXP (x, 0)) == PLUS
6423 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6424 return 0;
6426 return 1;
6429 /* Record that the current function needs a PIC register. Initialize
6430 cfun->machine->pic_reg if we have not already done so. */
6432 static void
6433 require_pic_register (void)
6435 /* A lot of the logic here is made obscure by the fact that this
6436 routine gets called as part of the rtx cost estimation process.
6437 We don't want those calls to affect any assumptions about the real
6438 function; and further, we can't call entry_of_function() until we
6439 start the real expansion process. */
6440 if (!crtl->uses_pic_offset_table)
6442 gcc_assert (can_create_pseudo_p ());
6443 if (arm_pic_register != INVALID_REGNUM
6444 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6446 if (!cfun->machine->pic_reg)
6447 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6449 /* Play games to avoid marking the function as needing pic
6450 if we are being called as part of the cost-estimation
6451 process. */
6452 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6453 crtl->uses_pic_offset_table = 1;
6455 else
6457 rtx_insn *seq, *insn;
6459 if (!cfun->machine->pic_reg)
6460 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6462 /* Play games to avoid marking the function as needing pic
6463 if we are being called as part of the cost-estimation
6464 process. */
6465 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6467 crtl->uses_pic_offset_table = 1;
6468 start_sequence ();
6470 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6471 && arm_pic_register > LAST_LO_REGNUM)
6472 emit_move_insn (cfun->machine->pic_reg,
6473 gen_rtx_REG (Pmode, arm_pic_register));
6474 else
6475 arm_load_pic_register (0UL);
6477 seq = get_insns ();
6478 end_sequence ();
6480 for (insn = seq; insn; insn = NEXT_INSN (insn))
6481 if (INSN_P (insn))
6482 INSN_LOCATION (insn) = prologue_location;
6484 /* We can be called during expansion of PHI nodes, where
6485 we can't yet emit instructions directly in the final
6486 insn stream. Queue the insns on the entry edge, they will
6487 be committed after everything else is expanded. */
6488 insert_insn_on_edge (seq,
6489 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6496 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6498 if (GET_CODE (orig) == SYMBOL_REF
6499 || GET_CODE (orig) == LABEL_REF)
6501 rtx insn;
6503 if (reg == 0)
6505 gcc_assert (can_create_pseudo_p ());
6506 reg = gen_reg_rtx (Pmode);
6509 /* VxWorks does not impose a fixed gap between segments; the run-time
6510 gap can be different from the object-file gap. We therefore can't
6511 use GOTOFF unless we are absolutely sure that the symbol is in the
6512 same segment as the GOT. Unfortunately, the flexibility of linker
6513 scripts means that we can't be sure of that in general, so assume
6514 that GOTOFF is never valid on VxWorks. */
6515 if ((GET_CODE (orig) == LABEL_REF
6516 || (GET_CODE (orig) == SYMBOL_REF &&
6517 SYMBOL_REF_LOCAL_P (orig)))
6518 && NEED_GOT_RELOC
6519 && arm_pic_data_is_text_relative)
6520 insn = arm_pic_static_addr (orig, reg);
6521 else
6523 rtx pat;
6524 rtx mem;
6526 /* If this function doesn't have a pic register, create one now. */
6527 require_pic_register ();
6529 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6531 /* Make the MEM as close to a constant as possible. */
6532 mem = SET_SRC (pat);
6533 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6534 MEM_READONLY_P (mem) = 1;
6535 MEM_NOTRAP_P (mem) = 1;
6537 insn = emit_insn (pat);
6540 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6541 by loop. */
6542 set_unique_reg_note (insn, REG_EQUAL, orig);
6544 return reg;
6546 else if (GET_CODE (orig) == CONST)
6548 rtx base, offset;
6550 if (GET_CODE (XEXP (orig, 0)) == PLUS
6551 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6552 return orig;
6554 /* Handle the case where we have: const (UNSPEC_TLS). */
6555 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6556 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6557 return orig;
6559 /* Handle the case where we have:
6560 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6561 CONST_INT. */
6562 if (GET_CODE (XEXP (orig, 0)) == PLUS
6563 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6564 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6566 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6567 return orig;
6570 if (reg == 0)
6572 gcc_assert (can_create_pseudo_p ());
6573 reg = gen_reg_rtx (Pmode);
6576 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6578 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6579 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6580 base == reg ? 0 : reg);
6582 if (CONST_INT_P (offset))
6584 /* The base register doesn't really matter, we only want to
6585 test the index for the appropriate mode. */
6586 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6588 gcc_assert (can_create_pseudo_p ());
6589 offset = force_reg (Pmode, offset);
6592 if (CONST_INT_P (offset))
6593 return plus_constant (Pmode, base, INTVAL (offset));
6596 if (GET_MODE_SIZE (mode) > 4
6597 && (GET_MODE_CLASS (mode) == MODE_INT
6598 || TARGET_SOFT_FLOAT))
6600 emit_insn (gen_addsi3 (reg, base, offset));
6601 return reg;
6604 return gen_rtx_PLUS (Pmode, base, offset);
6607 return orig;
6611 /* Find a spare register to use during the prolog of a function. */
6613 static int
6614 thumb_find_work_register (unsigned long pushed_regs_mask)
6616 int reg;
6618 /* Check the argument registers first as these are call-used. The
6619 register allocation order means that sometimes r3 might be used
6620 but earlier argument registers might not, so check them all. */
6621 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6622 if (!df_regs_ever_live_p (reg))
6623 return reg;
6625 /* Before going on to check the call-saved registers we can try a couple
6626 more ways of deducing that r3 is available. The first is when we are
6627 pushing anonymous arguments onto the stack and we have less than 4
6628 registers worth of fixed arguments(*). In this case r3 will be part of
6629 the variable argument list and so we can be sure that it will be
6630 pushed right at the start of the function. Hence it will be available
6631 for the rest of the prologue.
6632 (*): ie crtl->args.pretend_args_size is greater than 0. */
6633 if (cfun->machine->uses_anonymous_args
6634 && crtl->args.pretend_args_size > 0)
6635 return LAST_ARG_REGNUM;
6637 /* The other case is when we have fixed arguments but less than 4 registers
6638 worth. In this case r3 might be used in the body of the function, but
6639 it is not being used to convey an argument into the function. In theory
6640 we could just check crtl->args.size to see how many bytes are
6641 being passed in argument registers, but it seems that it is unreliable.
6642 Sometimes it will have the value 0 when in fact arguments are being
6643 passed. (See testcase execute/20021111-1.c for an example). So we also
6644 check the args_info.nregs field as well. The problem with this field is
6645 that it makes no allowances for arguments that are passed to the
6646 function but which are not used. Hence we could miss an opportunity
6647 when a function has an unused argument in r3. But it is better to be
6648 safe than to be sorry. */
6649 if (! cfun->machine->uses_anonymous_args
6650 && crtl->args.size >= 0
6651 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6652 && (TARGET_AAPCS_BASED
6653 ? crtl->args.info.aapcs_ncrn < 4
6654 : crtl->args.info.nregs < 4))
6655 return LAST_ARG_REGNUM;
6657 /* Otherwise look for a call-saved register that is going to be pushed. */
6658 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6659 if (pushed_regs_mask & (1 << reg))
6660 return reg;
6662 if (TARGET_THUMB2)
6664 /* Thumb-2 can use high regs. */
6665 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6666 if (pushed_regs_mask & (1 << reg))
6667 return reg;
6669 /* Something went wrong - thumb_compute_save_reg_mask()
6670 should have arranged for a suitable register to be pushed. */
6671 gcc_unreachable ();
6674 static GTY(()) int pic_labelno;
6676 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6677 low register. */
6679 void
6680 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6682 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6684 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6685 return;
6687 gcc_assert (flag_pic);
6689 pic_reg = cfun->machine->pic_reg;
6690 if (TARGET_VXWORKS_RTP)
6692 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6693 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6694 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6696 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6698 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6699 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6701 else
6703 /* We use an UNSPEC rather than a LABEL_REF because this label
6704 never appears in the code stream. */
6706 labelno = GEN_INT (pic_labelno++);
6707 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6708 l1 = gen_rtx_CONST (VOIDmode, l1);
6710 /* On the ARM the PC register contains 'dot + 8' at the time of the
6711 addition, on the Thumb it is 'dot + 4'. */
6712 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6713 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6714 UNSPEC_GOTSYM_OFF);
6715 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6717 if (TARGET_32BIT)
6719 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6721 else /* TARGET_THUMB1 */
6723 if (arm_pic_register != INVALID_REGNUM
6724 && REGNO (pic_reg) > LAST_LO_REGNUM)
6726 /* We will have pushed the pic register, so we should always be
6727 able to find a work register. */
6728 pic_tmp = gen_rtx_REG (SImode,
6729 thumb_find_work_register (saved_regs));
6730 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6731 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6732 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6734 else if (arm_pic_register != INVALID_REGNUM
6735 && arm_pic_register > LAST_LO_REGNUM
6736 && REGNO (pic_reg) <= LAST_LO_REGNUM)
6738 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6739 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6740 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6742 else
6743 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6747 /* Need to emit this whether or not we obey regdecls,
6748 since setjmp/longjmp can cause life info to screw up. */
6749 emit_use (pic_reg);
6752 /* Generate code to load the address of a static var when flag_pic is set. */
6753 static rtx
6754 arm_pic_static_addr (rtx orig, rtx reg)
6756 rtx l1, labelno, offset_rtx, insn;
6758 gcc_assert (flag_pic);
6760 /* We use an UNSPEC rather than a LABEL_REF because this label
6761 never appears in the code stream. */
6762 labelno = GEN_INT (pic_labelno++);
6763 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6764 l1 = gen_rtx_CONST (VOIDmode, l1);
6766 /* On the ARM the PC register contains 'dot + 8' at the time of the
6767 addition, on the Thumb it is 'dot + 4'. */
6768 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6769 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6770 UNSPEC_SYMBOL_OFFSET);
6771 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6773 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6774 return insn;
6777 /* Return nonzero if X is valid as an ARM state addressing register. */
6778 static int
6779 arm_address_register_rtx_p (rtx x, int strict_p)
6781 int regno;
6783 if (!REG_P (x))
6784 return 0;
6786 regno = REGNO (x);
6788 if (strict_p)
6789 return ARM_REGNO_OK_FOR_BASE_P (regno);
6791 return (regno <= LAST_ARM_REGNUM
6792 || regno >= FIRST_PSEUDO_REGISTER
6793 || regno == FRAME_POINTER_REGNUM
6794 || regno == ARG_POINTER_REGNUM);
6797 /* Return TRUE if this rtx is the difference of a symbol and a label,
6798 and will reduce to a PC-relative relocation in the object file.
6799 Expressions like this can be left alone when generating PIC, rather
6800 than forced through the GOT. */
6801 static int
6802 pcrel_constant_p (rtx x)
6804 if (GET_CODE (x) == MINUS)
6805 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6807 return FALSE;
6810 /* Return true if X will surely end up in an index register after next
6811 splitting pass. */
6812 static bool
6813 will_be_in_index_register (const_rtx x)
6815 /* arm.md: calculate_pic_address will split this into a register. */
6816 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6819 /* Return nonzero if X is a valid ARM state address operand. */
6821 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
6822 int strict_p)
6824 bool use_ldrd;
6825 enum rtx_code code = GET_CODE (x);
6827 if (arm_address_register_rtx_p (x, strict_p))
6828 return 1;
6830 use_ldrd = (TARGET_LDRD
6831 && (mode == DImode
6832 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6834 if (code == POST_INC || code == PRE_DEC
6835 || ((code == PRE_INC || code == POST_DEC)
6836 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6837 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6839 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6840 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6841 && GET_CODE (XEXP (x, 1)) == PLUS
6842 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6844 rtx addend = XEXP (XEXP (x, 1), 1);
6846 /* Don't allow ldrd post increment by register because it's hard
6847 to fixup invalid register choices. */
6848 if (use_ldrd
6849 && GET_CODE (x) == POST_MODIFY
6850 && REG_P (addend))
6851 return 0;
6853 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6854 && arm_legitimate_index_p (mode, addend, outer, strict_p));
6857 /* After reload constants split into minipools will have addresses
6858 from a LABEL_REF. */
6859 else if (reload_completed
6860 && (code == LABEL_REF
6861 || (code == CONST
6862 && GET_CODE (XEXP (x, 0)) == PLUS
6863 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6864 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6865 return 1;
6867 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6868 return 0;
6870 else if (code == PLUS)
6872 rtx xop0 = XEXP (x, 0);
6873 rtx xop1 = XEXP (x, 1);
6875 return ((arm_address_register_rtx_p (xop0, strict_p)
6876 && ((CONST_INT_P (xop1)
6877 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6878 || (!strict_p && will_be_in_index_register (xop1))))
6879 || (arm_address_register_rtx_p (xop1, strict_p)
6880 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6883 #if 0
6884 /* Reload currently can't handle MINUS, so disable this for now */
6885 else if (GET_CODE (x) == MINUS)
6887 rtx xop0 = XEXP (x, 0);
6888 rtx xop1 = XEXP (x, 1);
6890 return (arm_address_register_rtx_p (xop0, strict_p)
6891 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6893 #endif
6895 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6896 && code == SYMBOL_REF
6897 && CONSTANT_POOL_ADDRESS_P (x)
6898 && ! (flag_pic
6899 && symbol_mentioned_p (get_pool_constant (x))
6900 && ! pcrel_constant_p (get_pool_constant (x))))
6901 return 1;
6903 return 0;
6906 /* Return nonzero if X is a valid Thumb-2 address operand. */
6907 static int
6908 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
6910 bool use_ldrd;
6911 enum rtx_code code = GET_CODE (x);
6913 if (arm_address_register_rtx_p (x, strict_p))
6914 return 1;
6916 use_ldrd = (TARGET_LDRD
6917 && (mode == DImode
6918 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6920 if (code == POST_INC || code == PRE_DEC
6921 || ((code == PRE_INC || code == POST_DEC)
6922 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6923 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6925 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6926 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6927 && GET_CODE (XEXP (x, 1)) == PLUS
6928 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6930 /* Thumb-2 only has autoincrement by constant. */
6931 rtx addend = XEXP (XEXP (x, 1), 1);
6932 HOST_WIDE_INT offset;
6934 if (!CONST_INT_P (addend))
6935 return 0;
6937 offset = INTVAL(addend);
6938 if (GET_MODE_SIZE (mode) <= 4)
6939 return (offset > -256 && offset < 256);
6941 return (use_ldrd && offset > -1024 && offset < 1024
6942 && (offset & 3) == 0);
6945 /* After reload constants split into minipools will have addresses
6946 from a LABEL_REF. */
6947 else if (reload_completed
6948 && (code == LABEL_REF
6949 || (code == CONST
6950 && GET_CODE (XEXP (x, 0)) == PLUS
6951 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6952 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6953 return 1;
6955 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6956 return 0;
6958 else if (code == PLUS)
6960 rtx xop0 = XEXP (x, 0);
6961 rtx xop1 = XEXP (x, 1);
6963 return ((arm_address_register_rtx_p (xop0, strict_p)
6964 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
6965 || (!strict_p && will_be_in_index_register (xop1))))
6966 || (arm_address_register_rtx_p (xop1, strict_p)
6967 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
6970 /* Normally we can assign constant values to target registers without
6971 the help of constant pool. But there are cases we have to use constant
6972 pool like:
6973 1) assign a label to register.
6974 2) sign-extend a 8bit value to 32bit and then assign to register.
6976 Constant pool access in format:
6977 (set (reg r0) (mem (symbol_ref (".LC0"))))
6978 will cause the use of literal pool (later in function arm_reorg).
6979 So here we mark such format as an invalid format, then the compiler
6980 will adjust it into:
6981 (set (reg r0) (symbol_ref (".LC0")))
6982 (set (reg r0) (mem (reg r0))).
6983 No extra register is required, and (mem (reg r0)) won't cause the use
6984 of literal pools. */
6985 else if (arm_disable_literal_pool && code == SYMBOL_REF
6986 && CONSTANT_POOL_ADDRESS_P (x))
6987 return 0;
6989 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6990 && code == SYMBOL_REF
6991 && CONSTANT_POOL_ADDRESS_P (x)
6992 && ! (flag_pic
6993 && symbol_mentioned_p (get_pool_constant (x))
6994 && ! pcrel_constant_p (get_pool_constant (x))))
6995 return 1;
6997 return 0;
7000 /* Return nonzero if INDEX is valid for an address index operand in
7001 ARM state. */
7002 static int
7003 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7004 int strict_p)
7006 HOST_WIDE_INT range;
7007 enum rtx_code code = GET_CODE (index);
7009 /* Standard coprocessor addressing modes. */
7010 if (TARGET_HARD_FLOAT
7011 && TARGET_VFP
7012 && (mode == SFmode || mode == DFmode))
7013 return (code == CONST_INT && INTVAL (index) < 1024
7014 && INTVAL (index) > -1024
7015 && (INTVAL (index) & 3) == 0);
7017 /* For quad modes, we restrict the constant offset to be slightly less
7018 than what the instruction format permits. We do this because for
7019 quad mode moves, we will actually decompose them into two separate
7020 double-mode reads or writes. INDEX must therefore be a valid
7021 (double-mode) offset and so should INDEX+8. */
7022 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7023 return (code == CONST_INT
7024 && INTVAL (index) < 1016
7025 && INTVAL (index) > -1024
7026 && (INTVAL (index) & 3) == 0);
7028 /* We have no such constraint on double mode offsets, so we permit the
7029 full range of the instruction format. */
7030 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7031 return (code == CONST_INT
7032 && INTVAL (index) < 1024
7033 && INTVAL (index) > -1024
7034 && (INTVAL (index) & 3) == 0);
7036 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7037 return (code == CONST_INT
7038 && INTVAL (index) < 1024
7039 && INTVAL (index) > -1024
7040 && (INTVAL (index) & 3) == 0);
7042 if (arm_address_register_rtx_p (index, strict_p)
7043 && (GET_MODE_SIZE (mode) <= 4))
7044 return 1;
7046 if (mode == DImode || mode == DFmode)
7048 if (code == CONST_INT)
7050 HOST_WIDE_INT val = INTVAL (index);
7052 if (TARGET_LDRD)
7053 return val > -256 && val < 256;
7054 else
7055 return val > -4096 && val < 4092;
7058 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7061 if (GET_MODE_SIZE (mode) <= 4
7062 && ! (arm_arch4
7063 && (mode == HImode
7064 || mode == HFmode
7065 || (mode == QImode && outer == SIGN_EXTEND))))
7067 if (code == MULT)
7069 rtx xiop0 = XEXP (index, 0);
7070 rtx xiop1 = XEXP (index, 1);
7072 return ((arm_address_register_rtx_p (xiop0, strict_p)
7073 && power_of_two_operand (xiop1, SImode))
7074 || (arm_address_register_rtx_p (xiop1, strict_p)
7075 && power_of_two_operand (xiop0, SImode)));
7077 else if (code == LSHIFTRT || code == ASHIFTRT
7078 || code == ASHIFT || code == ROTATERT)
7080 rtx op = XEXP (index, 1);
7082 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7083 && CONST_INT_P (op)
7084 && INTVAL (op) > 0
7085 && INTVAL (op) <= 31);
7089 /* For ARM v4 we may be doing a sign-extend operation during the
7090 load. */
7091 if (arm_arch4)
7093 if (mode == HImode
7094 || mode == HFmode
7095 || (outer == SIGN_EXTEND && mode == QImode))
7096 range = 256;
7097 else
7098 range = 4096;
7100 else
7101 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7103 return (code == CONST_INT
7104 && INTVAL (index) < range
7105 && INTVAL (index) > -range);
7108 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7109 index operand. i.e. 1, 2, 4 or 8. */
7110 static bool
7111 thumb2_index_mul_operand (rtx op)
7113 HOST_WIDE_INT val;
7115 if (!CONST_INT_P (op))
7116 return false;
7118 val = INTVAL(op);
7119 return (val == 1 || val == 2 || val == 4 || val == 8);
7122 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7123 static int
7124 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7126 enum rtx_code code = GET_CODE (index);
7128 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7129 /* Standard coprocessor addressing modes. */
7130 if (TARGET_HARD_FLOAT
7131 && TARGET_VFP
7132 && (mode == SFmode || mode == DFmode))
7133 return (code == CONST_INT && INTVAL (index) < 1024
7134 /* Thumb-2 allows only > -256 index range for it's core register
7135 load/stores. Since we allow SF/DF in core registers, we have
7136 to use the intersection between -256~4096 (core) and -1024~1024
7137 (coprocessor). */
7138 && INTVAL (index) > -256
7139 && (INTVAL (index) & 3) == 0);
7141 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7143 /* For DImode assume values will usually live in core regs
7144 and only allow LDRD addressing modes. */
7145 if (!TARGET_LDRD || mode != DImode)
7146 return (code == CONST_INT
7147 && INTVAL (index) < 1024
7148 && INTVAL (index) > -1024
7149 && (INTVAL (index) & 3) == 0);
7152 /* For quad modes, we restrict the constant offset to be slightly less
7153 than what the instruction format permits. We do this because for
7154 quad mode moves, we will actually decompose them into two separate
7155 double-mode reads or writes. INDEX must therefore be a valid
7156 (double-mode) offset and so should INDEX+8. */
7157 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7158 return (code == CONST_INT
7159 && INTVAL (index) < 1016
7160 && INTVAL (index) > -1024
7161 && (INTVAL (index) & 3) == 0);
7163 /* We have no such constraint on double mode offsets, so we permit the
7164 full range of the instruction format. */
7165 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7166 return (code == CONST_INT
7167 && INTVAL (index) < 1024
7168 && INTVAL (index) > -1024
7169 && (INTVAL (index) & 3) == 0);
7171 if (arm_address_register_rtx_p (index, strict_p)
7172 && (GET_MODE_SIZE (mode) <= 4))
7173 return 1;
7175 if (mode == DImode || mode == DFmode)
7177 if (code == CONST_INT)
7179 HOST_WIDE_INT val = INTVAL (index);
7180 /* ??? Can we assume ldrd for thumb2? */
7181 /* Thumb-2 ldrd only has reg+const addressing modes. */
7182 /* ldrd supports offsets of +-1020.
7183 However the ldr fallback does not. */
7184 return val > -256 && val < 256 && (val & 3) == 0;
7186 else
7187 return 0;
7190 if (code == MULT)
7192 rtx xiop0 = XEXP (index, 0);
7193 rtx xiop1 = XEXP (index, 1);
7195 return ((arm_address_register_rtx_p (xiop0, strict_p)
7196 && thumb2_index_mul_operand (xiop1))
7197 || (arm_address_register_rtx_p (xiop1, strict_p)
7198 && thumb2_index_mul_operand (xiop0)));
7200 else if (code == ASHIFT)
7202 rtx op = XEXP (index, 1);
7204 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7205 && CONST_INT_P (op)
7206 && INTVAL (op) > 0
7207 && INTVAL (op) <= 3);
7210 return (code == CONST_INT
7211 && INTVAL (index) < 4096
7212 && INTVAL (index) > -256);
7215 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7216 static int
7217 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7219 int regno;
7221 if (!REG_P (x))
7222 return 0;
7224 regno = REGNO (x);
7226 if (strict_p)
7227 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7229 return (regno <= LAST_LO_REGNUM
7230 || regno > LAST_VIRTUAL_REGISTER
7231 || regno == FRAME_POINTER_REGNUM
7232 || (GET_MODE_SIZE (mode) >= 4
7233 && (regno == STACK_POINTER_REGNUM
7234 || regno >= FIRST_PSEUDO_REGISTER
7235 || x == hard_frame_pointer_rtx
7236 || x == arg_pointer_rtx)));
7239 /* Return nonzero if x is a legitimate index register. This is the case
7240 for any base register that can access a QImode object. */
7241 inline static int
7242 thumb1_index_register_rtx_p (rtx x, int strict_p)
7244 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7247 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7249 The AP may be eliminated to either the SP or the FP, so we use the
7250 least common denominator, e.g. SImode, and offsets from 0 to 64.
7252 ??? Verify whether the above is the right approach.
7254 ??? Also, the FP may be eliminated to the SP, so perhaps that
7255 needs special handling also.
7257 ??? Look at how the mips16 port solves this problem. It probably uses
7258 better ways to solve some of these problems.
7260 Although it is not incorrect, we don't accept QImode and HImode
7261 addresses based on the frame pointer or arg pointer until the
7262 reload pass starts. This is so that eliminating such addresses
7263 into stack based ones won't produce impossible code. */
7265 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7267 /* ??? Not clear if this is right. Experiment. */
7268 if (GET_MODE_SIZE (mode) < 4
7269 && !(reload_in_progress || reload_completed)
7270 && (reg_mentioned_p (frame_pointer_rtx, x)
7271 || reg_mentioned_p (arg_pointer_rtx, x)
7272 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7273 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7274 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7275 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7276 return 0;
7278 /* Accept any base register. SP only in SImode or larger. */
7279 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7280 return 1;
7282 /* This is PC relative data before arm_reorg runs. */
7283 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7284 && GET_CODE (x) == SYMBOL_REF
7285 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7286 return 1;
7288 /* This is PC relative data after arm_reorg runs. */
7289 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7290 && reload_completed
7291 && (GET_CODE (x) == LABEL_REF
7292 || (GET_CODE (x) == CONST
7293 && GET_CODE (XEXP (x, 0)) == PLUS
7294 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7295 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7296 return 1;
7298 /* Post-inc indexing only supported for SImode and larger. */
7299 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7300 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7301 return 1;
7303 else if (GET_CODE (x) == PLUS)
7305 /* REG+REG address can be any two index registers. */
7306 /* We disallow FRAME+REG addressing since we know that FRAME
7307 will be replaced with STACK, and SP relative addressing only
7308 permits SP+OFFSET. */
7309 if (GET_MODE_SIZE (mode) <= 4
7310 && XEXP (x, 0) != frame_pointer_rtx
7311 && XEXP (x, 1) != frame_pointer_rtx
7312 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7313 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7314 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7315 return 1;
7317 /* REG+const has 5-7 bit offset for non-SP registers. */
7318 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7319 || XEXP (x, 0) == arg_pointer_rtx)
7320 && CONST_INT_P (XEXP (x, 1))
7321 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7322 return 1;
7324 /* REG+const has 10-bit offset for SP, but only SImode and
7325 larger is supported. */
7326 /* ??? Should probably check for DI/DFmode overflow here
7327 just like GO_IF_LEGITIMATE_OFFSET does. */
7328 else if (REG_P (XEXP (x, 0))
7329 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7330 && GET_MODE_SIZE (mode) >= 4
7331 && CONST_INT_P (XEXP (x, 1))
7332 && INTVAL (XEXP (x, 1)) >= 0
7333 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7334 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7335 return 1;
7337 else if (REG_P (XEXP (x, 0))
7338 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7339 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7340 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7341 && REGNO (XEXP (x, 0))
7342 <= LAST_VIRTUAL_POINTER_REGISTER))
7343 && GET_MODE_SIZE (mode) >= 4
7344 && CONST_INT_P (XEXP (x, 1))
7345 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7346 return 1;
7349 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7350 && GET_MODE_SIZE (mode) == 4
7351 && GET_CODE (x) == SYMBOL_REF
7352 && CONSTANT_POOL_ADDRESS_P (x)
7353 && ! (flag_pic
7354 && symbol_mentioned_p (get_pool_constant (x))
7355 && ! pcrel_constant_p (get_pool_constant (x))))
7356 return 1;
7358 return 0;
7361 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7362 instruction of mode MODE. */
7364 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7366 switch (GET_MODE_SIZE (mode))
7368 case 1:
7369 return val >= 0 && val < 32;
7371 case 2:
7372 return val >= 0 && val < 64 && (val & 1) == 0;
7374 default:
7375 return (val >= 0
7376 && (val + GET_MODE_SIZE (mode)) <= 128
7377 && (val & 3) == 0);
7381 bool
7382 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7384 if (TARGET_ARM)
7385 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7386 else if (TARGET_THUMB2)
7387 return thumb2_legitimate_address_p (mode, x, strict_p);
7388 else /* if (TARGET_THUMB1) */
7389 return thumb1_legitimate_address_p (mode, x, strict_p);
7392 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7394 Given an rtx X being reloaded into a reg required to be
7395 in class CLASS, return the class of reg to actually use.
7396 In general this is just CLASS, but for the Thumb core registers and
7397 immediate constants we prefer a LO_REGS class or a subset. */
7399 static reg_class_t
7400 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7402 if (TARGET_32BIT)
7403 return rclass;
7404 else
7406 if (rclass == GENERAL_REGS)
7407 return LO_REGS;
7408 else
7409 return rclass;
7413 /* Build the SYMBOL_REF for __tls_get_addr. */
7415 static GTY(()) rtx tls_get_addr_libfunc;
7417 static rtx
7418 get_tls_get_addr (void)
7420 if (!tls_get_addr_libfunc)
7421 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7422 return tls_get_addr_libfunc;
7426 arm_load_tp (rtx target)
7428 if (!target)
7429 target = gen_reg_rtx (SImode);
7431 if (TARGET_HARD_TP)
7433 /* Can return in any reg. */
7434 emit_insn (gen_load_tp_hard (target));
7436 else
7438 /* Always returned in r0. Immediately copy the result into a pseudo,
7439 otherwise other uses of r0 (e.g. setting up function arguments) may
7440 clobber the value. */
7442 rtx tmp;
7444 emit_insn (gen_load_tp_soft ());
7446 tmp = gen_rtx_REG (SImode, 0);
7447 emit_move_insn (target, tmp);
7449 return target;
7452 static rtx
7453 load_tls_operand (rtx x, rtx reg)
7455 rtx tmp;
7457 if (reg == NULL_RTX)
7458 reg = gen_reg_rtx (SImode);
7460 tmp = gen_rtx_CONST (SImode, x);
7462 emit_move_insn (reg, tmp);
7464 return reg;
7467 static rtx
7468 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7470 rtx insns, label, labelno, sum;
7472 gcc_assert (reloc != TLS_DESCSEQ);
7473 start_sequence ();
7475 labelno = GEN_INT (pic_labelno++);
7476 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7477 label = gen_rtx_CONST (VOIDmode, label);
7479 sum = gen_rtx_UNSPEC (Pmode,
7480 gen_rtvec (4, x, GEN_INT (reloc), label,
7481 GEN_INT (TARGET_ARM ? 8 : 4)),
7482 UNSPEC_TLS);
7483 reg = load_tls_operand (sum, reg);
7485 if (TARGET_ARM)
7486 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7487 else
7488 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7490 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7491 LCT_PURE, /* LCT_CONST? */
7492 Pmode, 1, reg, Pmode);
7494 insns = get_insns ();
7495 end_sequence ();
7497 return insns;
7500 static rtx
7501 arm_tls_descseq_addr (rtx x, rtx reg)
7503 rtx labelno = GEN_INT (pic_labelno++);
7504 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7505 rtx sum = gen_rtx_UNSPEC (Pmode,
7506 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7507 gen_rtx_CONST (VOIDmode, label),
7508 GEN_INT (!TARGET_ARM)),
7509 UNSPEC_TLS);
7510 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
7512 emit_insn (gen_tlscall (x, labelno));
7513 if (!reg)
7514 reg = gen_reg_rtx (SImode);
7515 else
7516 gcc_assert (REGNO (reg) != 0);
7518 emit_move_insn (reg, reg0);
7520 return reg;
7524 legitimize_tls_address (rtx x, rtx reg)
7526 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7527 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7529 switch (model)
7531 case TLS_MODEL_GLOBAL_DYNAMIC:
7532 if (TARGET_GNU2_TLS)
7534 reg = arm_tls_descseq_addr (x, reg);
7536 tp = arm_load_tp (NULL_RTX);
7538 dest = gen_rtx_PLUS (Pmode, tp, reg);
7540 else
7542 /* Original scheme */
7543 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7544 dest = gen_reg_rtx (Pmode);
7545 emit_libcall_block (insns, dest, ret, x);
7547 return dest;
7549 case TLS_MODEL_LOCAL_DYNAMIC:
7550 if (TARGET_GNU2_TLS)
7552 reg = arm_tls_descseq_addr (x, reg);
7554 tp = arm_load_tp (NULL_RTX);
7556 dest = gen_rtx_PLUS (Pmode, tp, reg);
7558 else
7560 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7562 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7563 share the LDM result with other LD model accesses. */
7564 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7565 UNSPEC_TLS);
7566 dest = gen_reg_rtx (Pmode);
7567 emit_libcall_block (insns, dest, ret, eqv);
7569 /* Load the addend. */
7570 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7571 GEN_INT (TLS_LDO32)),
7572 UNSPEC_TLS);
7573 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7574 dest = gen_rtx_PLUS (Pmode, dest, addend);
7576 return dest;
7578 case TLS_MODEL_INITIAL_EXEC:
7579 labelno = GEN_INT (pic_labelno++);
7580 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7581 label = gen_rtx_CONST (VOIDmode, label);
7582 sum = gen_rtx_UNSPEC (Pmode,
7583 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7584 GEN_INT (TARGET_ARM ? 8 : 4)),
7585 UNSPEC_TLS);
7586 reg = load_tls_operand (sum, reg);
7588 if (TARGET_ARM)
7589 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7590 else if (TARGET_THUMB2)
7591 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7592 else
7594 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7595 emit_move_insn (reg, gen_const_mem (SImode, reg));
7598 tp = arm_load_tp (NULL_RTX);
7600 return gen_rtx_PLUS (Pmode, tp, reg);
7602 case TLS_MODEL_LOCAL_EXEC:
7603 tp = arm_load_tp (NULL_RTX);
7605 reg = gen_rtx_UNSPEC (Pmode,
7606 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7607 UNSPEC_TLS);
7608 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7610 return gen_rtx_PLUS (Pmode, tp, reg);
7612 default:
7613 abort ();
7617 /* Try machine-dependent ways of modifying an illegitimate address
7618 to be legitimate. If we find one, return the new, valid address. */
7620 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7622 if (arm_tls_referenced_p (x))
7624 rtx addend = NULL;
7626 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7628 addend = XEXP (XEXP (x, 0), 1);
7629 x = XEXP (XEXP (x, 0), 0);
7632 if (GET_CODE (x) != SYMBOL_REF)
7633 return x;
7635 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7637 x = legitimize_tls_address (x, NULL_RTX);
7639 if (addend)
7641 x = gen_rtx_PLUS (SImode, x, addend);
7642 orig_x = x;
7644 else
7645 return x;
7648 if (!TARGET_ARM)
7650 /* TODO: legitimize_address for Thumb2. */
7651 if (TARGET_THUMB2)
7652 return x;
7653 return thumb_legitimize_address (x, orig_x, mode);
7656 if (GET_CODE (x) == PLUS)
7658 rtx xop0 = XEXP (x, 0);
7659 rtx xop1 = XEXP (x, 1);
7661 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7662 xop0 = force_reg (SImode, xop0);
7664 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7665 && !symbol_mentioned_p (xop1))
7666 xop1 = force_reg (SImode, xop1);
7668 if (ARM_BASE_REGISTER_RTX_P (xop0)
7669 && CONST_INT_P (xop1))
7671 HOST_WIDE_INT n, low_n;
7672 rtx base_reg, val;
7673 n = INTVAL (xop1);
7675 /* VFP addressing modes actually allow greater offsets, but for
7676 now we just stick with the lowest common denominator. */
7677 if (mode == DImode
7678 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7680 low_n = n & 0x0f;
7681 n &= ~0x0f;
7682 if (low_n > 4)
7684 n += 16;
7685 low_n -= 16;
7688 else
7690 low_n = ((mode) == TImode ? 0
7691 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7692 n -= low_n;
7695 base_reg = gen_reg_rtx (SImode);
7696 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7697 emit_move_insn (base_reg, val);
7698 x = plus_constant (Pmode, base_reg, low_n);
7700 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7701 x = gen_rtx_PLUS (SImode, xop0, xop1);
7704 /* XXX We don't allow MINUS any more -- see comment in
7705 arm_legitimate_address_outer_p (). */
7706 else if (GET_CODE (x) == MINUS)
7708 rtx xop0 = XEXP (x, 0);
7709 rtx xop1 = XEXP (x, 1);
7711 if (CONSTANT_P (xop0))
7712 xop0 = force_reg (SImode, xop0);
7714 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7715 xop1 = force_reg (SImode, xop1);
7717 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7718 x = gen_rtx_MINUS (SImode, xop0, xop1);
7721 /* Make sure to take full advantage of the pre-indexed addressing mode
7722 with absolute addresses which often allows for the base register to
7723 be factorized for multiple adjacent memory references, and it might
7724 even allows for the mini pool to be avoided entirely. */
7725 else if (CONST_INT_P (x) && optimize > 0)
7727 unsigned int bits;
7728 HOST_WIDE_INT mask, base, index;
7729 rtx base_reg;
7731 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7732 use a 8-bit index. So let's use a 12-bit index for SImode only and
7733 hope that arm_gen_constant will enable ldrb to use more bits. */
7734 bits = (mode == SImode) ? 12 : 8;
7735 mask = (1 << bits) - 1;
7736 base = INTVAL (x) & ~mask;
7737 index = INTVAL (x) & mask;
7738 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7740 /* It'll most probably be more efficient to generate the base
7741 with more bits set and use a negative index instead. */
7742 base |= mask;
7743 index -= mask;
7745 base_reg = force_reg (SImode, GEN_INT (base));
7746 x = plus_constant (Pmode, base_reg, index);
7749 if (flag_pic)
7751 /* We need to find and carefully transform any SYMBOL and LABEL
7752 references; so go back to the original address expression. */
7753 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7755 if (new_x != orig_x)
7756 x = new_x;
7759 return x;
7763 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7764 to be legitimate. If we find one, return the new, valid address. */
7766 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7768 if (GET_CODE (x) == PLUS
7769 && CONST_INT_P (XEXP (x, 1))
7770 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7771 || INTVAL (XEXP (x, 1)) < 0))
7773 rtx xop0 = XEXP (x, 0);
7774 rtx xop1 = XEXP (x, 1);
7775 HOST_WIDE_INT offset = INTVAL (xop1);
7777 /* Try and fold the offset into a biasing of the base register and
7778 then offsetting that. Don't do this when optimizing for space
7779 since it can cause too many CSEs. */
7780 if (optimize_size && offset >= 0
7781 && offset < 256 + 31 * GET_MODE_SIZE (mode))
7783 HOST_WIDE_INT delta;
7785 if (offset >= 256)
7786 delta = offset - (256 - GET_MODE_SIZE (mode));
7787 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7788 delta = 31 * GET_MODE_SIZE (mode);
7789 else
7790 delta = offset & (~31 * GET_MODE_SIZE (mode));
7792 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7793 NULL_RTX);
7794 x = plus_constant (Pmode, xop0, delta);
7796 else if (offset < 0 && offset > -256)
7797 /* Small negative offsets are best done with a subtract before the
7798 dereference, forcing these into a register normally takes two
7799 instructions. */
7800 x = force_operand (x, NULL_RTX);
7801 else
7803 /* For the remaining cases, force the constant into a register. */
7804 xop1 = force_reg (SImode, xop1);
7805 x = gen_rtx_PLUS (SImode, xop0, xop1);
7808 else if (GET_CODE (x) == PLUS
7809 && s_register_operand (XEXP (x, 1), SImode)
7810 && !s_register_operand (XEXP (x, 0), SImode))
7812 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7814 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7817 if (flag_pic)
7819 /* We need to find and carefully transform any SYMBOL and LABEL
7820 references; so go back to the original address expression. */
7821 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7823 if (new_x != orig_x)
7824 x = new_x;
7827 return x;
7830 bool
7831 arm_legitimize_reload_address (rtx *p,
7832 machine_mode mode,
7833 int opnum, int type,
7834 int ind_levels ATTRIBUTE_UNUSED)
7836 /* We must recognize output that we have already generated ourselves. */
7837 if (GET_CODE (*p) == PLUS
7838 && GET_CODE (XEXP (*p, 0)) == PLUS
7839 && REG_P (XEXP (XEXP (*p, 0), 0))
7840 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7841 && CONST_INT_P (XEXP (*p, 1)))
7843 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7844 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7845 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7846 return true;
7849 if (GET_CODE (*p) == PLUS
7850 && REG_P (XEXP (*p, 0))
7851 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7852 /* If the base register is equivalent to a constant, let the generic
7853 code handle it. Otherwise we will run into problems if a future
7854 reload pass decides to rematerialize the constant. */
7855 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7856 && CONST_INT_P (XEXP (*p, 1)))
7858 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7859 HOST_WIDE_INT low, high;
7861 /* Detect coprocessor load/stores. */
7862 bool coproc_p = ((TARGET_HARD_FLOAT
7863 && TARGET_VFP
7864 && (mode == SFmode || mode == DFmode))
7865 || (TARGET_REALLY_IWMMXT
7866 && VALID_IWMMXT_REG_MODE (mode))
7867 || (TARGET_NEON
7868 && (VALID_NEON_DREG_MODE (mode)
7869 || VALID_NEON_QREG_MODE (mode))));
7871 /* For some conditions, bail out when lower two bits are unaligned. */
7872 if ((val & 0x3) != 0
7873 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7874 && (coproc_p
7875 /* For DI, and DF under soft-float: */
7876 || ((mode == DImode || mode == DFmode)
7877 /* Without ldrd, we use stm/ldm, which does not
7878 fair well with unaligned bits. */
7879 && (! TARGET_LDRD
7880 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7881 || TARGET_THUMB2))))
7882 return false;
7884 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7885 of which the (reg+high) gets turned into a reload add insn,
7886 we try to decompose the index into high/low values that can often
7887 also lead to better reload CSE.
7888 For example:
7889 ldr r0, [r2, #4100] // Offset too large
7890 ldr r1, [r2, #4104] // Offset too large
7892 is best reloaded as:
7893 add t1, r2, #4096
7894 ldr r0, [t1, #4]
7895 add t2, r2, #4096
7896 ldr r1, [t2, #8]
7898 which post-reload CSE can simplify in most cases to eliminate the
7899 second add instruction:
7900 add t1, r2, #4096
7901 ldr r0, [t1, #4]
7902 ldr r1, [t1, #8]
7904 The idea here is that we want to split out the bits of the constant
7905 as a mask, rather than as subtracting the maximum offset that the
7906 respective type of load/store used can handle.
7908 When encountering negative offsets, we can still utilize it even if
7909 the overall offset is positive; sometimes this may lead to an immediate
7910 that can be constructed with fewer instructions.
7911 For example:
7912 ldr r0, [r2, #0x3FFFFC]
7914 This is best reloaded as:
7915 add t1, r2, #0x400000
7916 ldr r0, [t1, #-4]
7918 The trick for spotting this for a load insn with N bits of offset
7919 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7920 negative offset that is going to make bit N and all the bits below
7921 it become zero in the remainder part.
7923 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7924 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7925 used in most cases of ARM load/store instructions. */
7927 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7928 (((VAL) & ((1 << (N)) - 1)) \
7929 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7930 : 0)
7932 if (coproc_p)
7934 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
7936 /* NEON quad-word load/stores are made of two double-word accesses,
7937 so the valid index range is reduced by 8. Treat as 9-bit range if
7938 we go over it. */
7939 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
7940 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
7942 else if (GET_MODE_SIZE (mode) == 8)
7944 if (TARGET_LDRD)
7945 low = (TARGET_THUMB2
7946 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
7947 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
7948 else
7949 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7950 to access doublewords. The supported load/store offsets are
7951 -8, -4, and 4, which we try to produce here. */
7952 low = ((val & 0xf) ^ 0x8) - 0x8;
7954 else if (GET_MODE_SIZE (mode) < 8)
7956 /* NEON element load/stores do not have an offset. */
7957 if (TARGET_NEON_FP16 && mode == HFmode)
7958 return false;
7960 if (TARGET_THUMB2)
7962 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7963 Try the wider 12-bit range first, and re-try if the result
7964 is out of range. */
7965 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7966 if (low < -255)
7967 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7969 else
7971 if (mode == HImode || mode == HFmode)
7973 if (arm_arch4)
7974 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7975 else
7977 /* The storehi/movhi_bytes fallbacks can use only
7978 [-4094,+4094] of the full ldrb/strb index range. */
7979 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7980 if (low == 4095 || low == -4095)
7981 return false;
7984 else
7985 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7988 else
7989 return false;
7991 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
7992 ^ (unsigned HOST_WIDE_INT) 0x80000000)
7993 - (unsigned HOST_WIDE_INT) 0x80000000);
7994 /* Check for overflow or zero */
7995 if (low == 0 || high == 0 || (high + low != val))
7996 return false;
7998 /* Reload the high part into a base reg; leave the low part
7999 in the mem.
8000 Note that replacing this gen_rtx_PLUS with plus_constant is
8001 wrong in this case because we rely on the
8002 (plus (plus reg c1) c2) structure being preserved so that
8003 XEXP (*p, 0) in push_reload below uses the correct term. */
8004 *p = gen_rtx_PLUS (GET_MODE (*p),
8005 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
8006 GEN_INT (high)),
8007 GEN_INT (low));
8008 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
8009 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
8010 VOIDmode, 0, 0, opnum, (enum reload_type) type);
8011 return true;
8014 return false;
8018 thumb_legitimize_reload_address (rtx *x_p,
8019 machine_mode mode,
8020 int opnum, int type,
8021 int ind_levels ATTRIBUTE_UNUSED)
8023 rtx x = *x_p;
8025 if (GET_CODE (x) == PLUS
8026 && GET_MODE_SIZE (mode) < 4
8027 && REG_P (XEXP (x, 0))
8028 && XEXP (x, 0) == stack_pointer_rtx
8029 && CONST_INT_P (XEXP (x, 1))
8030 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8032 rtx orig_x = x;
8034 x = copy_rtx (x);
8035 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8036 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8037 return x;
8040 /* If both registers are hi-regs, then it's better to reload the
8041 entire expression rather than each register individually. That
8042 only requires one reload register rather than two. */
8043 if (GET_CODE (x) == PLUS
8044 && REG_P (XEXP (x, 0))
8045 && REG_P (XEXP (x, 1))
8046 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
8047 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
8049 rtx orig_x = x;
8051 x = copy_rtx (x);
8052 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8053 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8054 return x;
8057 return NULL;
8060 /* Return TRUE if X contains any TLS symbol references. */
8062 bool
8063 arm_tls_referenced_p (rtx x)
8065 if (! TARGET_HAVE_TLS)
8066 return false;
8068 subrtx_iterator::array_type array;
8069 FOR_EACH_SUBRTX (iter, array, x, ALL)
8071 const_rtx x = *iter;
8072 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8073 return true;
8075 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8076 TLS offsets, not real symbol references. */
8077 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8078 iter.skip_subrtxes ();
8080 return false;
8083 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8085 On the ARM, allow any integer (invalid ones are removed later by insn
8086 patterns), nice doubles and symbol_refs which refer to the function's
8087 constant pool XXX.
8089 When generating pic allow anything. */
8091 static bool
8092 arm_legitimate_constant_p_1 (machine_mode mode, rtx x)
8094 /* At present, we have no support for Neon structure constants, so forbid
8095 them here. It might be possible to handle simple cases like 0 and -1
8096 in future. */
8097 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8098 return false;
8100 return flag_pic || !label_mentioned_p (x);
8103 static bool
8104 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8106 return (CONST_INT_P (x)
8107 || CONST_DOUBLE_P (x)
8108 || CONSTANT_ADDRESS_P (x)
8109 || flag_pic);
8112 static bool
8113 arm_legitimate_constant_p (machine_mode mode, rtx x)
8115 return (!arm_cannot_force_const_mem (mode, x)
8116 && (TARGET_32BIT
8117 ? arm_legitimate_constant_p_1 (mode, x)
8118 : thumb_legitimate_constant_p (mode, x)));
8121 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8123 static bool
8124 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8126 rtx base, offset;
8128 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8130 split_const (x, &base, &offset);
8131 if (GET_CODE (base) == SYMBOL_REF
8132 && !offset_within_block_p (base, INTVAL (offset)))
8133 return true;
8135 return arm_tls_referenced_p (x);
8138 #define REG_OR_SUBREG_REG(X) \
8139 (REG_P (X) \
8140 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8142 #define REG_OR_SUBREG_RTX(X) \
8143 (REG_P (X) ? (X) : SUBREG_REG (X))
8145 static inline int
8146 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8148 machine_mode mode = GET_MODE (x);
8149 int total, words;
8151 switch (code)
8153 case ASHIFT:
8154 case ASHIFTRT:
8155 case LSHIFTRT:
8156 case ROTATERT:
8157 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8159 case PLUS:
8160 case MINUS:
8161 case COMPARE:
8162 case NEG:
8163 case NOT:
8164 return COSTS_N_INSNS (1);
8166 case MULT:
8167 if (CONST_INT_P (XEXP (x, 1)))
8169 int cycles = 0;
8170 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8172 while (i)
8174 i >>= 2;
8175 cycles++;
8177 return COSTS_N_INSNS (2) + cycles;
8179 return COSTS_N_INSNS (1) + 16;
8181 case SET:
8182 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8183 the mode. */
8184 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8185 return (COSTS_N_INSNS (words)
8186 + 4 * ((MEM_P (SET_SRC (x)))
8187 + MEM_P (SET_DEST (x))));
8189 case CONST_INT:
8190 if (outer == SET)
8192 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8193 return 0;
8194 if (thumb_shiftable_const (INTVAL (x)))
8195 return COSTS_N_INSNS (2);
8196 return COSTS_N_INSNS (3);
8198 else if ((outer == PLUS || outer == COMPARE)
8199 && INTVAL (x) < 256 && INTVAL (x) > -256)
8200 return 0;
8201 else if ((outer == IOR || outer == XOR || outer == AND)
8202 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8203 return COSTS_N_INSNS (1);
8204 else if (outer == AND)
8206 int i;
8207 /* This duplicates the tests in the andsi3 expander. */
8208 for (i = 9; i <= 31; i++)
8209 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8210 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8211 return COSTS_N_INSNS (2);
8213 else if (outer == ASHIFT || outer == ASHIFTRT
8214 || outer == LSHIFTRT)
8215 return 0;
8216 return COSTS_N_INSNS (2);
8218 case CONST:
8219 case CONST_DOUBLE:
8220 case LABEL_REF:
8221 case SYMBOL_REF:
8222 return COSTS_N_INSNS (3);
8224 case UDIV:
8225 case UMOD:
8226 case DIV:
8227 case MOD:
8228 return 100;
8230 case TRUNCATE:
8231 return 99;
8233 case AND:
8234 case XOR:
8235 case IOR:
8236 /* XXX guess. */
8237 return 8;
8239 case MEM:
8240 /* XXX another guess. */
8241 /* Memory costs quite a lot for the first word, but subsequent words
8242 load at the equivalent of a single insn each. */
8243 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8244 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8245 ? 4 : 0));
8247 case IF_THEN_ELSE:
8248 /* XXX a guess. */
8249 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8250 return 14;
8251 return 2;
8253 case SIGN_EXTEND:
8254 case ZERO_EXTEND:
8255 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8256 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8258 if (mode == SImode)
8259 return total;
8261 if (arm_arch6)
8262 return total + COSTS_N_INSNS (1);
8264 /* Assume a two-shift sequence. Increase the cost slightly so
8265 we prefer actual shifts over an extend operation. */
8266 return total + 1 + COSTS_N_INSNS (2);
8268 default:
8269 return 99;
8273 static inline bool
8274 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8276 machine_mode mode = GET_MODE (x);
8277 enum rtx_code subcode;
8278 rtx operand;
8279 enum rtx_code code = GET_CODE (x);
8280 *total = 0;
8282 switch (code)
8284 case MEM:
8285 /* Memory costs quite a lot for the first word, but subsequent words
8286 load at the equivalent of a single insn each. */
8287 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8288 return true;
8290 case DIV:
8291 case MOD:
8292 case UDIV:
8293 case UMOD:
8294 if (TARGET_HARD_FLOAT && mode == SFmode)
8295 *total = COSTS_N_INSNS (2);
8296 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8297 *total = COSTS_N_INSNS (4);
8298 else
8299 *total = COSTS_N_INSNS (20);
8300 return false;
8302 case ROTATE:
8303 if (REG_P (XEXP (x, 1)))
8304 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8305 else if (!CONST_INT_P (XEXP (x, 1)))
8306 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8308 /* Fall through */
8309 case ROTATERT:
8310 if (mode != SImode)
8312 *total += COSTS_N_INSNS (4);
8313 return true;
8316 /* Fall through */
8317 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8318 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8319 if (mode == DImode)
8321 *total += COSTS_N_INSNS (3);
8322 return true;
8325 *total += COSTS_N_INSNS (1);
8326 /* Increase the cost of complex shifts because they aren't any faster,
8327 and reduce dual issue opportunities. */
8328 if (arm_tune_cortex_a9
8329 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8330 ++*total;
8332 return true;
8334 case MINUS:
8335 if (mode == DImode)
8337 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8338 if (CONST_INT_P (XEXP (x, 0))
8339 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8341 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8342 return true;
8345 if (CONST_INT_P (XEXP (x, 1))
8346 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8348 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8349 return true;
8352 return false;
8355 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8357 if (TARGET_HARD_FLOAT
8358 && (mode == SFmode
8359 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8361 *total = COSTS_N_INSNS (1);
8362 if (CONST_DOUBLE_P (XEXP (x, 0))
8363 && arm_const_double_rtx (XEXP (x, 0)))
8365 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8366 return true;
8369 if (CONST_DOUBLE_P (XEXP (x, 1))
8370 && arm_const_double_rtx (XEXP (x, 1)))
8372 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8373 return true;
8376 return false;
8378 *total = COSTS_N_INSNS (20);
8379 return false;
8382 *total = COSTS_N_INSNS (1);
8383 if (CONST_INT_P (XEXP (x, 0))
8384 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8386 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8387 return true;
8390 subcode = GET_CODE (XEXP (x, 1));
8391 if (subcode == ASHIFT || subcode == ASHIFTRT
8392 || subcode == LSHIFTRT
8393 || subcode == ROTATE || subcode == ROTATERT)
8395 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8396 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8397 return true;
8400 /* A shift as a part of RSB costs no more than RSB itself. */
8401 if (GET_CODE (XEXP (x, 0)) == MULT
8402 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8404 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8405 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8406 return true;
8409 if (subcode == MULT
8410 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8412 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8413 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8414 return true;
8417 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8418 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8420 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8421 if (REG_P (XEXP (XEXP (x, 1), 0))
8422 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8423 *total += COSTS_N_INSNS (1);
8425 return true;
8428 /* Fall through */
8430 case PLUS:
8431 if (code == PLUS && arm_arch6 && mode == SImode
8432 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8433 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8435 *total = COSTS_N_INSNS (1);
8436 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8437 0, speed);
8438 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8439 return true;
8442 /* MLA: All arguments must be registers. We filter out
8443 multiplication by a power of two, so that we fall down into
8444 the code below. */
8445 if (GET_CODE (XEXP (x, 0)) == MULT
8446 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8448 /* The cost comes from the cost of the multiply. */
8449 return false;
8452 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8454 if (TARGET_HARD_FLOAT
8455 && (mode == SFmode
8456 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8458 *total = COSTS_N_INSNS (1);
8459 if (CONST_DOUBLE_P (XEXP (x, 1))
8460 && arm_const_double_rtx (XEXP (x, 1)))
8462 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8463 return true;
8466 return false;
8469 *total = COSTS_N_INSNS (20);
8470 return false;
8473 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8474 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8476 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8477 if (REG_P (XEXP (XEXP (x, 0), 0))
8478 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8479 *total += COSTS_N_INSNS (1);
8480 return true;
8483 /* Fall through */
8485 case AND: case XOR: case IOR:
8487 /* Normally the frame registers will be spilt into reg+const during
8488 reload, so it is a bad idea to combine them with other instructions,
8489 since then they might not be moved outside of loops. As a compromise
8490 we allow integration with ops that have a constant as their second
8491 operand. */
8492 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8493 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8494 && !CONST_INT_P (XEXP (x, 1)))
8495 *total = COSTS_N_INSNS (1);
8497 if (mode == DImode)
8499 *total += COSTS_N_INSNS (2);
8500 if (CONST_INT_P (XEXP (x, 1))
8501 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8503 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8504 return true;
8507 return false;
8510 *total += COSTS_N_INSNS (1);
8511 if (CONST_INT_P (XEXP (x, 1))
8512 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8514 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8515 return true;
8517 subcode = GET_CODE (XEXP (x, 0));
8518 if (subcode == ASHIFT || subcode == ASHIFTRT
8519 || subcode == LSHIFTRT
8520 || subcode == ROTATE || subcode == ROTATERT)
8522 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8523 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8524 return true;
8527 if (subcode == MULT
8528 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8530 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8531 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8532 return true;
8535 if (subcode == UMIN || subcode == UMAX
8536 || subcode == SMIN || subcode == SMAX)
8538 *total = COSTS_N_INSNS (3);
8539 return true;
8542 return false;
8544 case MULT:
8545 /* This should have been handled by the CPU specific routines. */
8546 gcc_unreachable ();
8548 case TRUNCATE:
8549 if (arm_arch3m && mode == SImode
8550 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8551 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8552 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8553 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8554 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8555 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8557 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8558 return true;
8560 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8561 return false;
8563 case NEG:
8564 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8566 if (TARGET_HARD_FLOAT
8567 && (mode == SFmode
8568 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8570 *total = COSTS_N_INSNS (1);
8571 return false;
8573 *total = COSTS_N_INSNS (2);
8574 return false;
8577 /* Fall through */
8578 case NOT:
8579 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8580 if (mode == SImode && code == NOT)
8582 subcode = GET_CODE (XEXP (x, 0));
8583 if (subcode == ASHIFT || subcode == ASHIFTRT
8584 || subcode == LSHIFTRT
8585 || subcode == ROTATE || subcode == ROTATERT
8586 || (subcode == MULT
8587 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8589 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8590 /* Register shifts cost an extra cycle. */
8591 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8592 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8593 subcode, 1, speed);
8594 return true;
8598 return false;
8600 case IF_THEN_ELSE:
8601 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8603 *total = COSTS_N_INSNS (4);
8604 return true;
8607 operand = XEXP (x, 0);
8609 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8610 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8611 && REG_P (XEXP (operand, 0))
8612 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8613 *total += COSTS_N_INSNS (1);
8614 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8615 + rtx_cost (XEXP (x, 2), code, 2, speed));
8616 return true;
8618 case NE:
8619 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8621 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8622 return true;
8624 goto scc_insn;
8626 case GE:
8627 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8628 && mode == SImode && XEXP (x, 1) == const0_rtx)
8630 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8631 return true;
8633 goto scc_insn;
8635 case LT:
8636 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8637 && mode == SImode && XEXP (x, 1) == const0_rtx)
8639 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8640 return true;
8642 goto scc_insn;
8644 case EQ:
8645 case GT:
8646 case LE:
8647 case GEU:
8648 case LTU:
8649 case GTU:
8650 case LEU:
8651 case UNORDERED:
8652 case ORDERED:
8653 case UNEQ:
8654 case UNGE:
8655 case UNLT:
8656 case UNGT:
8657 case UNLE:
8658 scc_insn:
8659 /* SCC insns. In the case where the comparison has already been
8660 performed, then they cost 2 instructions. Otherwise they need
8661 an additional comparison before them. */
8662 *total = COSTS_N_INSNS (2);
8663 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8665 return true;
8668 /* Fall through */
8669 case COMPARE:
8670 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8672 *total = 0;
8673 return true;
8676 *total += COSTS_N_INSNS (1);
8677 if (CONST_INT_P (XEXP (x, 1))
8678 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8680 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8681 return true;
8684 subcode = GET_CODE (XEXP (x, 0));
8685 if (subcode == ASHIFT || subcode == ASHIFTRT
8686 || subcode == LSHIFTRT
8687 || subcode == ROTATE || subcode == ROTATERT)
8689 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8690 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8691 return true;
8694 if (subcode == MULT
8695 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8697 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8698 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8699 return true;
8702 return false;
8704 case UMIN:
8705 case UMAX:
8706 case SMIN:
8707 case SMAX:
8708 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8709 if (!CONST_INT_P (XEXP (x, 1))
8710 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8711 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8712 return true;
8714 case ABS:
8715 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8717 if (TARGET_HARD_FLOAT
8718 && (mode == SFmode
8719 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8721 *total = COSTS_N_INSNS (1);
8722 return false;
8724 *total = COSTS_N_INSNS (20);
8725 return false;
8727 *total = COSTS_N_INSNS (1);
8728 if (mode == DImode)
8729 *total += COSTS_N_INSNS (3);
8730 return false;
8732 case SIGN_EXTEND:
8733 case ZERO_EXTEND:
8734 *total = 0;
8735 if (GET_MODE_CLASS (mode) == MODE_INT)
8737 rtx op = XEXP (x, 0);
8738 machine_mode opmode = GET_MODE (op);
8740 if (mode == DImode)
8741 *total += COSTS_N_INSNS (1);
8743 if (opmode != SImode)
8745 if (MEM_P (op))
8747 /* If !arm_arch4, we use one of the extendhisi2_mem
8748 or movhi_bytes patterns for HImode. For a QImode
8749 sign extension, we first zero-extend from memory
8750 and then perform a shift sequence. */
8751 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8752 *total += COSTS_N_INSNS (2);
8754 else if (arm_arch6)
8755 *total += COSTS_N_INSNS (1);
8757 /* We don't have the necessary insn, so we need to perform some
8758 other operation. */
8759 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8760 /* An and with constant 255. */
8761 *total += COSTS_N_INSNS (1);
8762 else
8763 /* A shift sequence. Increase costs slightly to avoid
8764 combining two shifts into an extend operation. */
8765 *total += COSTS_N_INSNS (2) + 1;
8768 return false;
8771 switch (GET_MODE (XEXP (x, 0)))
8773 case V8QImode:
8774 case V4HImode:
8775 case V2SImode:
8776 case V4QImode:
8777 case V2HImode:
8778 *total = COSTS_N_INSNS (1);
8779 return false;
8781 default:
8782 gcc_unreachable ();
8784 gcc_unreachable ();
8786 case ZERO_EXTRACT:
8787 case SIGN_EXTRACT:
8788 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8789 return true;
8791 case CONST_INT:
8792 if (const_ok_for_arm (INTVAL (x))
8793 || const_ok_for_arm (~INTVAL (x)))
8794 *total = COSTS_N_INSNS (1);
8795 else
8796 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8797 INTVAL (x), NULL_RTX,
8798 NULL_RTX, 0, 0));
8799 return true;
8801 case CONST:
8802 case LABEL_REF:
8803 case SYMBOL_REF:
8804 *total = COSTS_N_INSNS (3);
8805 return true;
8807 case HIGH:
8808 *total = COSTS_N_INSNS (1);
8809 return true;
8811 case LO_SUM:
8812 *total = COSTS_N_INSNS (1);
8813 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8814 return true;
8816 case CONST_DOUBLE:
8817 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8818 && (mode == SFmode || !TARGET_VFP_SINGLE))
8819 *total = COSTS_N_INSNS (1);
8820 else
8821 *total = COSTS_N_INSNS (4);
8822 return true;
8824 case SET:
8825 /* The vec_extract patterns accept memory operands that require an
8826 address reload. Account for the cost of that reload to give the
8827 auto-inc-dec pass an incentive to try to replace them. */
8828 if (TARGET_NEON && MEM_P (SET_DEST (x))
8829 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8831 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8832 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8833 *total += COSTS_N_INSNS (1);
8834 return true;
8836 /* Likewise for the vec_set patterns. */
8837 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8838 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8839 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8841 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8842 *total = rtx_cost (mem, code, 0, speed);
8843 if (!neon_vector_mem_operand (mem, 2, true))
8844 *total += COSTS_N_INSNS (1);
8845 return true;
8847 return false;
8849 case UNSPEC:
8850 /* We cost this as high as our memory costs to allow this to
8851 be hoisted from loops. */
8852 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8854 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8856 return true;
8858 case CONST_VECTOR:
8859 if (TARGET_NEON
8860 && TARGET_HARD_FLOAT
8861 && outer == SET
8862 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8863 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8864 *total = COSTS_N_INSNS (1);
8865 else
8866 *total = COSTS_N_INSNS (4);
8867 return true;
8869 default:
8870 *total = COSTS_N_INSNS (4);
8871 return false;
8875 /* Estimates the size cost of thumb1 instructions.
8876 For now most of the code is copied from thumb1_rtx_costs. We need more
8877 fine grain tuning when we have more related test cases. */
8878 static inline int
8879 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8881 machine_mode mode = GET_MODE (x);
8882 int words;
8884 switch (code)
8886 case ASHIFT:
8887 case ASHIFTRT:
8888 case LSHIFTRT:
8889 case ROTATERT:
8890 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8892 case PLUS:
8893 case MINUS:
8894 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8895 defined by RTL expansion, especially for the expansion of
8896 multiplication. */
8897 if ((GET_CODE (XEXP (x, 0)) == MULT
8898 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8899 || (GET_CODE (XEXP (x, 1)) == MULT
8900 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8901 return COSTS_N_INSNS (2);
8902 /* On purpose fall through for normal RTX. */
8903 case COMPARE:
8904 case NEG:
8905 case NOT:
8906 return COSTS_N_INSNS (1);
8908 case MULT:
8909 if (CONST_INT_P (XEXP (x, 1)))
8911 /* Thumb1 mul instruction can't operate on const. We must Load it
8912 into a register first. */
8913 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8914 /* For the targets which have a very small and high-latency multiply
8915 unit, we prefer to synthesize the mult with up to 5 instructions,
8916 giving a good balance between size and performance. */
8917 if (arm_arch6m && arm_m_profile_small_mul)
8918 return COSTS_N_INSNS (5);
8919 else
8920 return COSTS_N_INSNS (1) + const_size;
8922 return COSTS_N_INSNS (1);
8924 case SET:
8925 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8926 the mode. */
8927 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8928 return COSTS_N_INSNS (words)
8929 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x))
8930 || satisfies_constraint_K (SET_SRC (x))
8931 /* thumb1_movdi_insn. */
8932 || ((words > 1) && MEM_P (SET_SRC (x))));
8934 case CONST_INT:
8935 if (outer == SET)
8937 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8938 return COSTS_N_INSNS (1);
8939 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8940 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8941 return COSTS_N_INSNS (2);
8942 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8943 if (thumb_shiftable_const (INTVAL (x)))
8944 return COSTS_N_INSNS (2);
8945 return COSTS_N_INSNS (3);
8947 else if ((outer == PLUS || outer == COMPARE)
8948 && INTVAL (x) < 256 && INTVAL (x) > -256)
8949 return 0;
8950 else if ((outer == IOR || outer == XOR || outer == AND)
8951 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8952 return COSTS_N_INSNS (1);
8953 else if (outer == AND)
8955 int i;
8956 /* This duplicates the tests in the andsi3 expander. */
8957 for (i = 9; i <= 31; i++)
8958 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8959 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8960 return COSTS_N_INSNS (2);
8962 else if (outer == ASHIFT || outer == ASHIFTRT
8963 || outer == LSHIFTRT)
8964 return 0;
8965 return COSTS_N_INSNS (2);
8967 case CONST:
8968 case CONST_DOUBLE:
8969 case LABEL_REF:
8970 case SYMBOL_REF:
8971 return COSTS_N_INSNS (3);
8973 case UDIV:
8974 case UMOD:
8975 case DIV:
8976 case MOD:
8977 return 100;
8979 case TRUNCATE:
8980 return 99;
8982 case AND:
8983 case XOR:
8984 case IOR:
8985 return COSTS_N_INSNS (1);
8987 case MEM:
8988 return (COSTS_N_INSNS (1)
8989 + COSTS_N_INSNS (1)
8990 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8991 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8992 ? COSTS_N_INSNS (1) : 0));
8994 case IF_THEN_ELSE:
8995 /* XXX a guess. */
8996 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8997 return 14;
8998 return 2;
9000 case ZERO_EXTEND:
9001 /* XXX still guessing. */
9002 switch (GET_MODE (XEXP (x, 0)))
9004 case QImode:
9005 return (1 + (mode == DImode ? 4 : 0)
9006 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9008 case HImode:
9009 return (4 + (mode == DImode ? 4 : 0)
9010 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9012 case SImode:
9013 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9015 default:
9016 return 99;
9019 default:
9020 return 99;
9024 /* RTX costs when optimizing for size. */
9025 static bool
9026 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9027 int *total)
9029 machine_mode mode = GET_MODE (x);
9030 if (TARGET_THUMB1)
9032 *total = thumb1_size_rtx_costs (x, code, outer_code);
9033 return true;
9036 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9037 switch (code)
9039 case MEM:
9040 /* A memory access costs 1 insn if the mode is small, or the address is
9041 a single register, otherwise it costs one insn per word. */
9042 if (REG_P (XEXP (x, 0)))
9043 *total = COSTS_N_INSNS (1);
9044 else if (flag_pic
9045 && GET_CODE (XEXP (x, 0)) == PLUS
9046 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9047 /* This will be split into two instructions.
9048 See arm.md:calculate_pic_address. */
9049 *total = COSTS_N_INSNS (2);
9050 else
9051 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9052 return true;
9054 case DIV:
9055 case MOD:
9056 case UDIV:
9057 case UMOD:
9058 /* Needs a libcall, so it costs about this. */
9059 *total = COSTS_N_INSNS (2);
9060 return false;
9062 case ROTATE:
9063 if (mode == SImode && REG_P (XEXP (x, 1)))
9065 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
9066 return true;
9068 /* Fall through */
9069 case ROTATERT:
9070 case ASHIFT:
9071 case LSHIFTRT:
9072 case ASHIFTRT:
9073 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9075 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
9076 return true;
9078 else if (mode == SImode)
9080 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
9081 /* Slightly disparage register shifts, but not by much. */
9082 if (!CONST_INT_P (XEXP (x, 1)))
9083 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
9084 return true;
9087 /* Needs a libcall. */
9088 *total = COSTS_N_INSNS (2);
9089 return false;
9091 case MINUS:
9092 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9093 && (mode == SFmode || !TARGET_VFP_SINGLE))
9095 *total = COSTS_N_INSNS (1);
9096 return false;
9099 if (mode == SImode)
9101 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9102 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9104 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9105 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9106 || subcode1 == ROTATE || subcode1 == ROTATERT
9107 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9108 || subcode1 == ASHIFTRT)
9110 /* It's just the cost of the two operands. */
9111 *total = 0;
9112 return false;
9115 *total = COSTS_N_INSNS (1);
9116 return false;
9119 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9120 return false;
9122 case PLUS:
9123 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9124 && (mode == SFmode || !TARGET_VFP_SINGLE))
9126 *total = COSTS_N_INSNS (1);
9127 return false;
9130 /* A shift as a part of ADD costs nothing. */
9131 if (GET_CODE (XEXP (x, 0)) == MULT
9132 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9134 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9135 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
9136 *total += rtx_cost (XEXP (x, 1), code, 1, false);
9137 return true;
9140 /* Fall through */
9141 case AND: case XOR: case IOR:
9142 if (mode == SImode)
9144 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9146 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9147 || subcode == LSHIFTRT || subcode == ASHIFTRT
9148 || (code == AND && subcode == NOT))
9150 /* It's just the cost of the two operands. */
9151 *total = 0;
9152 return false;
9156 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9157 return false;
9159 case MULT:
9160 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9161 return false;
9163 case NEG:
9164 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9165 && (mode == SFmode || !TARGET_VFP_SINGLE))
9167 *total = COSTS_N_INSNS (1);
9168 return false;
9171 /* Fall through */
9172 case NOT:
9173 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9175 return false;
9177 case IF_THEN_ELSE:
9178 *total = 0;
9179 return false;
9181 case COMPARE:
9182 if (cc_register (XEXP (x, 0), VOIDmode))
9183 * total = 0;
9184 else
9185 *total = COSTS_N_INSNS (1);
9186 return false;
9188 case ABS:
9189 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9190 && (mode == SFmode || !TARGET_VFP_SINGLE))
9191 *total = COSTS_N_INSNS (1);
9192 else
9193 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9194 return false;
9196 case SIGN_EXTEND:
9197 case ZERO_EXTEND:
9198 return arm_rtx_costs_1 (x, outer_code, total, 0);
9200 case CONST_INT:
9201 if (const_ok_for_arm (INTVAL (x)))
9202 /* A multiplication by a constant requires another instruction
9203 to load the constant to a register. */
9204 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9205 ? 1 : 0);
9206 else if (const_ok_for_arm (~INTVAL (x)))
9207 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9208 else if (const_ok_for_arm (-INTVAL (x)))
9210 if (outer_code == COMPARE || outer_code == PLUS
9211 || outer_code == MINUS)
9212 *total = 0;
9213 else
9214 *total = COSTS_N_INSNS (1);
9216 else
9217 *total = COSTS_N_INSNS (2);
9218 return true;
9220 case CONST:
9221 case LABEL_REF:
9222 case SYMBOL_REF:
9223 *total = COSTS_N_INSNS (2);
9224 return true;
9226 case CONST_DOUBLE:
9227 *total = COSTS_N_INSNS (4);
9228 return true;
9230 case CONST_VECTOR:
9231 if (TARGET_NEON
9232 && TARGET_HARD_FLOAT
9233 && outer_code == SET
9234 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9235 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9236 *total = COSTS_N_INSNS (1);
9237 else
9238 *total = COSTS_N_INSNS (4);
9239 return true;
9241 case HIGH:
9242 case LO_SUM:
9243 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9244 cost of these slightly. */
9245 *total = COSTS_N_INSNS (1) + 1;
9246 return true;
9248 case SET:
9249 return false;
9251 default:
9252 if (mode != VOIDmode)
9253 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9254 else
9255 *total = COSTS_N_INSNS (4); /* How knows? */
9256 return false;
9260 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9261 operand, then return the operand that is being shifted. If the shift
9262 is not by a constant, then set SHIFT_REG to point to the operand.
9263 Return NULL if OP is not a shifter operand. */
9264 static rtx
9265 shifter_op_p (rtx op, rtx *shift_reg)
9267 enum rtx_code code = GET_CODE (op);
9269 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9270 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9271 return XEXP (op, 0);
9272 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9273 return XEXP (op, 0);
9274 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9275 || code == ASHIFTRT)
9277 if (!CONST_INT_P (XEXP (op, 1)))
9278 *shift_reg = XEXP (op, 1);
9279 return XEXP (op, 0);
9282 return NULL;
9285 static bool
9286 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9288 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9289 gcc_assert (GET_CODE (x) == UNSPEC);
9291 switch (XINT (x, 1))
9293 case UNSPEC_UNALIGNED_LOAD:
9294 /* We can only do unaligned loads into the integer unit, and we can't
9295 use LDM or LDRD. */
9296 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9297 if (speed_p)
9298 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9299 + extra_cost->ldst.load_unaligned);
9301 #ifdef NOT_YET
9302 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9303 ADDR_SPACE_GENERIC, speed_p);
9304 #endif
9305 return true;
9307 case UNSPEC_UNALIGNED_STORE:
9308 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9309 if (speed_p)
9310 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9311 + extra_cost->ldst.store_unaligned);
9313 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9314 #ifdef NOT_YET
9315 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9316 ADDR_SPACE_GENERIC, speed_p);
9317 #endif
9318 return true;
9320 case UNSPEC_VRINTZ:
9321 case UNSPEC_VRINTP:
9322 case UNSPEC_VRINTM:
9323 case UNSPEC_VRINTR:
9324 case UNSPEC_VRINTX:
9325 case UNSPEC_VRINTA:
9326 *cost = COSTS_N_INSNS (1);
9327 if (speed_p)
9328 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9330 return true;
9331 default:
9332 *cost = COSTS_N_INSNS (2);
9333 break;
9335 return false;
9338 /* Cost of a libcall. We assume one insn per argument, an amount for the
9339 call (one insn for -Os) and then one for processing the result. */
9340 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9342 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9343 do \
9345 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9346 if (shift_op != NULL \
9347 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9349 if (shift_reg) \
9351 if (speed_p) \
9352 *cost += extra_cost->alu.arith_shift_reg; \
9353 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9355 else if (speed_p) \
9356 *cost += extra_cost->alu.arith_shift; \
9358 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9359 + rtx_cost (XEXP (x, 1 - IDX), \
9360 OP, 1, speed_p)); \
9361 return true; \
9364 while (0);
9366 /* RTX costs. Make an estimate of the cost of executing the operation
9367 X, which is contained with an operation with code OUTER_CODE.
9368 SPEED_P indicates whether the cost desired is the performance cost,
9369 or the size cost. The estimate is stored in COST and the return
9370 value is TRUE if the cost calculation is final, or FALSE if the
9371 caller should recurse through the operands of X to add additional
9372 costs.
9374 We currently make no attempt to model the size savings of Thumb-2
9375 16-bit instructions. At the normal points in compilation where
9376 this code is called we have no measure of whether the condition
9377 flags are live or not, and thus no realistic way to determine what
9378 the size will eventually be. */
9379 static bool
9380 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9381 const struct cpu_cost_table *extra_cost,
9382 int *cost, bool speed_p)
9384 machine_mode mode = GET_MODE (x);
9386 if (TARGET_THUMB1)
9388 if (speed_p)
9389 *cost = thumb1_rtx_costs (x, code, outer_code);
9390 else
9391 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9392 return true;
9395 switch (code)
9397 case SET:
9398 *cost = 0;
9399 /* SET RTXs don't have a mode so we get it from the destination. */
9400 mode = GET_MODE (SET_DEST (x));
9402 if (REG_P (SET_SRC (x))
9403 && REG_P (SET_DEST (x)))
9405 /* Assume that most copies can be done with a single insn,
9406 unless we don't have HW FP, in which case everything
9407 larger than word mode will require two insns. */
9408 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9409 && GET_MODE_SIZE (mode) > 4)
9410 || mode == DImode)
9411 ? 2 : 1);
9412 /* Conditional register moves can be encoded
9413 in 16 bits in Thumb mode. */
9414 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9415 *cost >>= 1;
9417 return true;
9420 if (CONST_INT_P (SET_SRC (x)))
9422 /* Handle CONST_INT here, since the value doesn't have a mode
9423 and we would otherwise be unable to work out the true cost. */
9424 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9425 outer_code = SET;
9426 /* Slightly lower the cost of setting a core reg to a constant.
9427 This helps break up chains and allows for better scheduling. */
9428 if (REG_P (SET_DEST (x))
9429 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9430 *cost -= 1;
9431 x = SET_SRC (x);
9432 /* Immediate moves with an immediate in the range [0, 255] can be
9433 encoded in 16 bits in Thumb mode. */
9434 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9435 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9436 *cost >>= 1;
9437 goto const_int_cost;
9440 return false;
9442 case MEM:
9443 /* A memory access costs 1 insn if the mode is small, or the address is
9444 a single register, otherwise it costs one insn per word. */
9445 if (REG_P (XEXP (x, 0)))
9446 *cost = COSTS_N_INSNS (1);
9447 else if (flag_pic
9448 && GET_CODE (XEXP (x, 0)) == PLUS
9449 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9450 /* This will be split into two instructions.
9451 See arm.md:calculate_pic_address. */
9452 *cost = COSTS_N_INSNS (2);
9453 else
9454 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9456 /* For speed optimizations, add the costs of the address and
9457 accessing memory. */
9458 if (speed_p)
9459 #ifdef NOT_YET
9460 *cost += (extra_cost->ldst.load
9461 + arm_address_cost (XEXP (x, 0), mode,
9462 ADDR_SPACE_GENERIC, speed_p));
9463 #else
9464 *cost += extra_cost->ldst.load;
9465 #endif
9466 return true;
9468 case PARALLEL:
9470 /* Calculations of LDM costs are complex. We assume an initial cost
9471 (ldm_1st) which will load the number of registers mentioned in
9472 ldm_regs_per_insn_1st registers; then each additional
9473 ldm_regs_per_insn_subsequent registers cost one more insn. The
9474 formula for N regs is thus:
9476 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9477 + ldm_regs_per_insn_subsequent - 1)
9478 / ldm_regs_per_insn_subsequent).
9480 Additional costs may also be added for addressing. A similar
9481 formula is used for STM. */
9483 bool is_ldm = load_multiple_operation (x, SImode);
9484 bool is_stm = store_multiple_operation (x, SImode);
9486 *cost = COSTS_N_INSNS (1);
9488 if (is_ldm || is_stm)
9490 if (speed_p)
9492 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9493 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9494 ? extra_cost->ldst.ldm_regs_per_insn_1st
9495 : extra_cost->ldst.stm_regs_per_insn_1st;
9496 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9497 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9498 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9500 *cost += regs_per_insn_1st
9501 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9502 + regs_per_insn_sub - 1)
9503 / regs_per_insn_sub);
9504 return true;
9508 return false;
9510 case DIV:
9511 case UDIV:
9512 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9513 && (mode == SFmode || !TARGET_VFP_SINGLE))
9514 *cost = COSTS_N_INSNS (speed_p
9515 ? extra_cost->fp[mode != SFmode].div : 1);
9516 else if (mode == SImode && TARGET_IDIV)
9517 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9518 else
9519 *cost = LIBCALL_COST (2);
9520 return false; /* All arguments must be in registers. */
9522 case MOD:
9523 case UMOD:
9524 *cost = LIBCALL_COST (2);
9525 return false; /* All arguments must be in registers. */
9527 case ROTATE:
9528 if (mode == SImode && REG_P (XEXP (x, 1)))
9530 *cost = (COSTS_N_INSNS (2)
9531 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9532 if (speed_p)
9533 *cost += extra_cost->alu.shift_reg;
9534 return true;
9536 /* Fall through */
9537 case ROTATERT:
9538 case ASHIFT:
9539 case LSHIFTRT:
9540 case ASHIFTRT:
9541 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9543 *cost = (COSTS_N_INSNS (3)
9544 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9545 if (speed_p)
9546 *cost += 2 * extra_cost->alu.shift;
9547 return true;
9549 else if (mode == SImode)
9551 *cost = (COSTS_N_INSNS (1)
9552 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9553 /* Slightly disparage register shifts at -Os, but not by much. */
9554 if (!CONST_INT_P (XEXP (x, 1)))
9555 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9556 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9557 return true;
9559 else if (GET_MODE_CLASS (mode) == MODE_INT
9560 && GET_MODE_SIZE (mode) < 4)
9562 if (code == ASHIFT)
9564 *cost = (COSTS_N_INSNS (1)
9565 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9566 /* Slightly disparage register shifts at -Os, but not by
9567 much. */
9568 if (!CONST_INT_P (XEXP (x, 1)))
9569 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9570 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9572 else if (code == LSHIFTRT || code == ASHIFTRT)
9574 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9576 /* Can use SBFX/UBFX. */
9577 *cost = COSTS_N_INSNS (1);
9578 if (speed_p)
9579 *cost += extra_cost->alu.bfx;
9580 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9582 else
9584 *cost = COSTS_N_INSNS (2);
9585 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9586 if (speed_p)
9588 if (CONST_INT_P (XEXP (x, 1)))
9589 *cost += 2 * extra_cost->alu.shift;
9590 else
9591 *cost += (extra_cost->alu.shift
9592 + extra_cost->alu.shift_reg);
9594 else
9595 /* Slightly disparage register shifts. */
9596 *cost += !CONST_INT_P (XEXP (x, 1));
9599 else /* Rotates. */
9601 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9602 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9603 if (speed_p)
9605 if (CONST_INT_P (XEXP (x, 1)))
9606 *cost += (2 * extra_cost->alu.shift
9607 + extra_cost->alu.log_shift);
9608 else
9609 *cost += (extra_cost->alu.shift
9610 + extra_cost->alu.shift_reg
9611 + extra_cost->alu.log_shift_reg);
9614 return true;
9617 *cost = LIBCALL_COST (2);
9618 return false;
9620 case BSWAP:
9621 if (arm_arch6)
9623 if (mode == SImode)
9625 *cost = COSTS_N_INSNS (1);
9626 if (speed_p)
9627 *cost += extra_cost->alu.rev;
9629 return false;
9632 else
9634 /* No rev instruction available. Look at arm_legacy_rev
9635 and thumb_legacy_rev for the form of RTL used then. */
9636 if (TARGET_THUMB)
9638 *cost = COSTS_N_INSNS (10);
9640 if (speed_p)
9642 *cost += 6 * extra_cost->alu.shift;
9643 *cost += 3 * extra_cost->alu.logical;
9646 else
9648 *cost = COSTS_N_INSNS (5);
9650 if (speed_p)
9652 *cost += 2 * extra_cost->alu.shift;
9653 *cost += extra_cost->alu.arith_shift;
9654 *cost += 2 * extra_cost->alu.logical;
9657 return true;
9659 return false;
9661 case MINUS:
9662 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9663 && (mode == SFmode || !TARGET_VFP_SINGLE))
9665 *cost = COSTS_N_INSNS (1);
9666 if (GET_CODE (XEXP (x, 0)) == MULT
9667 || GET_CODE (XEXP (x, 1)) == MULT)
9669 rtx mul_op0, mul_op1, sub_op;
9671 if (speed_p)
9672 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9674 if (GET_CODE (XEXP (x, 0)) == MULT)
9676 mul_op0 = XEXP (XEXP (x, 0), 0);
9677 mul_op1 = XEXP (XEXP (x, 0), 1);
9678 sub_op = XEXP (x, 1);
9680 else
9682 mul_op0 = XEXP (XEXP (x, 1), 0);
9683 mul_op1 = XEXP (XEXP (x, 1), 1);
9684 sub_op = XEXP (x, 0);
9687 /* The first operand of the multiply may be optionally
9688 negated. */
9689 if (GET_CODE (mul_op0) == NEG)
9690 mul_op0 = XEXP (mul_op0, 0);
9692 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9693 + rtx_cost (mul_op1, code, 0, speed_p)
9694 + rtx_cost (sub_op, code, 0, speed_p));
9696 return true;
9699 if (speed_p)
9700 *cost += extra_cost->fp[mode != SFmode].addsub;
9701 return false;
9704 if (mode == SImode)
9706 rtx shift_by_reg = NULL;
9707 rtx shift_op;
9708 rtx non_shift_op;
9710 *cost = COSTS_N_INSNS (1);
9712 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9713 if (shift_op == NULL)
9715 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9716 non_shift_op = XEXP (x, 0);
9718 else
9719 non_shift_op = XEXP (x, 1);
9721 if (shift_op != NULL)
9723 if (shift_by_reg != NULL)
9725 if (speed_p)
9726 *cost += extra_cost->alu.arith_shift_reg;
9727 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9729 else if (speed_p)
9730 *cost += extra_cost->alu.arith_shift;
9732 *cost += (rtx_cost (shift_op, code, 0, speed_p)
9733 + rtx_cost (non_shift_op, code, 0, speed_p));
9734 return true;
9737 if (arm_arch_thumb2
9738 && GET_CODE (XEXP (x, 1)) == MULT)
9740 /* MLS. */
9741 if (speed_p)
9742 *cost += extra_cost->mult[0].add;
9743 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9744 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9745 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9746 return true;
9749 if (CONST_INT_P (XEXP (x, 0)))
9751 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9752 INTVAL (XEXP (x, 0)), NULL_RTX,
9753 NULL_RTX, 1, 0);
9754 *cost = COSTS_N_INSNS (insns);
9755 if (speed_p)
9756 *cost += insns * extra_cost->alu.arith;
9757 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9758 return true;
9760 else if (speed_p)
9761 *cost += extra_cost->alu.arith;
9763 return false;
9766 if (GET_MODE_CLASS (mode) == MODE_INT
9767 && GET_MODE_SIZE (mode) < 4)
9769 rtx shift_op, shift_reg;
9770 shift_reg = NULL;
9772 /* We check both sides of the MINUS for shifter operands since,
9773 unlike PLUS, it's not commutative. */
9775 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9776 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9778 /* Slightly disparage, as we might need to widen the result. */
9779 *cost = 1 + COSTS_N_INSNS (1);
9780 if (speed_p)
9781 *cost += extra_cost->alu.arith;
9783 if (CONST_INT_P (XEXP (x, 0)))
9785 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9786 return true;
9789 return false;
9792 if (mode == DImode)
9794 *cost = COSTS_N_INSNS (2);
9796 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9798 rtx op1 = XEXP (x, 1);
9800 if (speed_p)
9801 *cost += 2 * extra_cost->alu.arith;
9803 if (GET_CODE (op1) == ZERO_EXTEND)
9804 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9805 else
9806 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9807 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9808 0, speed_p);
9809 return true;
9811 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9813 if (speed_p)
9814 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9815 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9816 0, speed_p)
9817 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9818 return true;
9820 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9821 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9823 if (speed_p)
9824 *cost += (extra_cost->alu.arith
9825 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9826 ? extra_cost->alu.arith
9827 : extra_cost->alu.arith_shift));
9828 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9829 + rtx_cost (XEXP (XEXP (x, 1), 0),
9830 GET_CODE (XEXP (x, 1)), 0, speed_p));
9831 return true;
9834 if (speed_p)
9835 *cost += 2 * extra_cost->alu.arith;
9836 return false;
9839 /* Vector mode? */
9841 *cost = LIBCALL_COST (2);
9842 return false;
9844 case PLUS:
9845 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9846 && (mode == SFmode || !TARGET_VFP_SINGLE))
9848 *cost = COSTS_N_INSNS (1);
9849 if (GET_CODE (XEXP (x, 0)) == MULT)
9851 rtx mul_op0, mul_op1, add_op;
9853 if (speed_p)
9854 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9856 mul_op0 = XEXP (XEXP (x, 0), 0);
9857 mul_op1 = XEXP (XEXP (x, 0), 1);
9858 add_op = XEXP (x, 1);
9860 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9861 + rtx_cost (mul_op1, code, 0, speed_p)
9862 + rtx_cost (add_op, code, 0, speed_p));
9864 return true;
9867 if (speed_p)
9868 *cost += extra_cost->fp[mode != SFmode].addsub;
9869 return false;
9871 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9873 *cost = LIBCALL_COST (2);
9874 return false;
9877 /* Narrow modes can be synthesized in SImode, but the range
9878 of useful sub-operations is limited. Check for shift operations
9879 on one of the operands. Only left shifts can be used in the
9880 narrow modes. */
9881 if (GET_MODE_CLASS (mode) == MODE_INT
9882 && GET_MODE_SIZE (mode) < 4)
9884 rtx shift_op, shift_reg;
9885 shift_reg = NULL;
9887 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9889 if (CONST_INT_P (XEXP (x, 1)))
9891 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9892 INTVAL (XEXP (x, 1)), NULL_RTX,
9893 NULL_RTX, 1, 0);
9894 *cost = COSTS_N_INSNS (insns);
9895 if (speed_p)
9896 *cost += insns * extra_cost->alu.arith;
9897 /* Slightly penalize a narrow operation as the result may
9898 need widening. */
9899 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9900 return true;
9903 /* Slightly penalize a narrow operation as the result may
9904 need widening. */
9905 *cost = 1 + COSTS_N_INSNS (1);
9906 if (speed_p)
9907 *cost += extra_cost->alu.arith;
9909 return false;
9912 if (mode == SImode)
9914 rtx shift_op, shift_reg;
9916 *cost = COSTS_N_INSNS (1);
9917 if (TARGET_INT_SIMD
9918 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9919 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9921 /* UXTA[BH] or SXTA[BH]. */
9922 if (speed_p)
9923 *cost += extra_cost->alu.extend_arith;
9924 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9925 speed_p)
9926 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9927 return true;
9930 shift_reg = NULL;
9931 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9932 if (shift_op != NULL)
9934 if (shift_reg)
9936 if (speed_p)
9937 *cost += extra_cost->alu.arith_shift_reg;
9938 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9940 else if (speed_p)
9941 *cost += extra_cost->alu.arith_shift;
9943 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9944 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9945 return true;
9947 if (GET_CODE (XEXP (x, 0)) == MULT)
9949 rtx mul_op = XEXP (x, 0);
9951 *cost = COSTS_N_INSNS (1);
9953 if (TARGET_DSP_MULTIPLY
9954 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9955 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9956 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9957 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9958 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9959 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9960 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9961 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9962 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9963 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9964 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9965 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9966 == 16))))))
9968 /* SMLA[BT][BT]. */
9969 if (speed_p)
9970 *cost += extra_cost->mult[0].extend_add;
9971 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9972 SIGN_EXTEND, 0, speed_p)
9973 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9974 SIGN_EXTEND, 0, speed_p)
9975 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9976 return true;
9979 if (speed_p)
9980 *cost += extra_cost->mult[0].add;
9981 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9982 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9983 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9984 return true;
9986 if (CONST_INT_P (XEXP (x, 1)))
9988 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9989 INTVAL (XEXP (x, 1)), NULL_RTX,
9990 NULL_RTX, 1, 0);
9991 *cost = COSTS_N_INSNS (insns);
9992 if (speed_p)
9993 *cost += insns * extra_cost->alu.arith;
9994 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9995 return true;
9997 else if (speed_p)
9998 *cost += extra_cost->alu.arith;
10000 return false;
10003 if (mode == DImode)
10005 if (arm_arch3m
10006 && GET_CODE (XEXP (x, 0)) == MULT
10007 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10008 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10009 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10010 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10012 *cost = COSTS_N_INSNS (1);
10013 if (speed_p)
10014 *cost += extra_cost->mult[1].extend_add;
10015 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
10016 ZERO_EXTEND, 0, speed_p)
10017 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
10018 ZERO_EXTEND, 0, speed_p)
10019 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10020 return true;
10023 *cost = COSTS_N_INSNS (2);
10025 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10026 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10028 if (speed_p)
10029 *cost += (extra_cost->alu.arith
10030 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10031 ? extra_cost->alu.arith
10032 : extra_cost->alu.arith_shift));
10034 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
10035 speed_p)
10036 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10037 return true;
10040 if (speed_p)
10041 *cost += 2 * extra_cost->alu.arith;
10042 return false;
10045 /* Vector mode? */
10046 *cost = LIBCALL_COST (2);
10047 return false;
10048 case IOR:
10049 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10051 *cost = COSTS_N_INSNS (1);
10052 if (speed_p)
10053 *cost += extra_cost->alu.rev;
10055 return true;
10057 /* Fall through. */
10058 case AND: case XOR:
10059 if (mode == SImode)
10061 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10062 rtx op0 = XEXP (x, 0);
10063 rtx shift_op, shift_reg;
10065 *cost = COSTS_N_INSNS (1);
10067 if (subcode == NOT
10068 && (code == AND
10069 || (code == IOR && TARGET_THUMB2)))
10070 op0 = XEXP (op0, 0);
10072 shift_reg = NULL;
10073 shift_op = shifter_op_p (op0, &shift_reg);
10074 if (shift_op != NULL)
10076 if (shift_reg)
10078 if (speed_p)
10079 *cost += extra_cost->alu.log_shift_reg;
10080 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10082 else if (speed_p)
10083 *cost += extra_cost->alu.log_shift;
10085 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10086 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10087 return true;
10090 if (CONST_INT_P (XEXP (x, 1)))
10092 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10093 INTVAL (XEXP (x, 1)), NULL_RTX,
10094 NULL_RTX, 1, 0);
10096 *cost = COSTS_N_INSNS (insns);
10097 if (speed_p)
10098 *cost += insns * extra_cost->alu.logical;
10099 *cost += rtx_cost (op0, code, 0, speed_p);
10100 return true;
10103 if (speed_p)
10104 *cost += extra_cost->alu.logical;
10105 *cost += (rtx_cost (op0, code, 0, speed_p)
10106 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10107 return true;
10110 if (mode == DImode)
10112 rtx op0 = XEXP (x, 0);
10113 enum rtx_code subcode = GET_CODE (op0);
10115 *cost = COSTS_N_INSNS (2);
10117 if (subcode == NOT
10118 && (code == AND
10119 || (code == IOR && TARGET_THUMB2)))
10120 op0 = XEXP (op0, 0);
10122 if (GET_CODE (op0) == ZERO_EXTEND)
10124 if (speed_p)
10125 *cost += 2 * extra_cost->alu.logical;
10127 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
10128 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10129 return true;
10131 else if (GET_CODE (op0) == SIGN_EXTEND)
10133 if (speed_p)
10134 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10136 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
10137 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10138 return true;
10141 if (speed_p)
10142 *cost += 2 * extra_cost->alu.logical;
10144 return true;
10146 /* Vector mode? */
10148 *cost = LIBCALL_COST (2);
10149 return false;
10151 case MULT:
10152 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10153 && (mode == SFmode || !TARGET_VFP_SINGLE))
10155 rtx op0 = XEXP (x, 0);
10157 *cost = COSTS_N_INSNS (1);
10159 if (GET_CODE (op0) == NEG)
10160 op0 = XEXP (op0, 0);
10162 if (speed_p)
10163 *cost += extra_cost->fp[mode != SFmode].mult;
10165 *cost += (rtx_cost (op0, MULT, 0, speed_p)
10166 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10167 return true;
10169 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10171 *cost = LIBCALL_COST (2);
10172 return false;
10175 if (mode == SImode)
10177 *cost = COSTS_N_INSNS (1);
10178 if (TARGET_DSP_MULTIPLY
10179 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10180 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10181 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10182 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10183 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10184 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10185 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10186 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10187 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10188 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10189 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10190 && (INTVAL (XEXP (XEXP (x, 1), 1))
10191 == 16))))))
10193 /* SMUL[TB][TB]. */
10194 if (speed_p)
10195 *cost += extra_cost->mult[0].extend;
10196 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10197 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10198 return true;
10200 if (speed_p)
10201 *cost += extra_cost->mult[0].simple;
10202 return false;
10205 if (mode == DImode)
10207 if (arm_arch3m
10208 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10209 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10210 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10211 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10213 *cost = COSTS_N_INSNS (1);
10214 if (speed_p)
10215 *cost += extra_cost->mult[1].extend;
10216 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10217 ZERO_EXTEND, 0, speed_p)
10218 + rtx_cost (XEXP (XEXP (x, 1), 0),
10219 ZERO_EXTEND, 0, speed_p));
10220 return true;
10223 *cost = LIBCALL_COST (2);
10224 return false;
10227 /* Vector mode? */
10228 *cost = LIBCALL_COST (2);
10229 return false;
10231 case NEG:
10232 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10233 && (mode == SFmode || !TARGET_VFP_SINGLE))
10235 *cost = COSTS_N_INSNS (1);
10236 if (speed_p)
10237 *cost += extra_cost->fp[mode != SFmode].neg;
10239 return false;
10241 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10243 *cost = LIBCALL_COST (1);
10244 return false;
10247 if (mode == SImode)
10249 if (GET_CODE (XEXP (x, 0)) == ABS)
10251 *cost = COSTS_N_INSNS (2);
10252 /* Assume the non-flag-changing variant. */
10253 if (speed_p)
10254 *cost += (extra_cost->alu.log_shift
10255 + extra_cost->alu.arith_shift);
10256 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10257 return true;
10260 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10261 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10263 *cost = COSTS_N_INSNS (2);
10264 /* No extra cost for MOV imm and MVN imm. */
10265 /* If the comparison op is using the flags, there's no further
10266 cost, otherwise we need to add the cost of the comparison. */
10267 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10268 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10269 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10271 *cost += (COSTS_N_INSNS (1)
10272 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10273 speed_p)
10274 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10275 speed_p));
10276 if (speed_p)
10277 *cost += extra_cost->alu.arith;
10279 return true;
10281 *cost = COSTS_N_INSNS (1);
10282 if (speed_p)
10283 *cost += extra_cost->alu.arith;
10284 return false;
10287 if (GET_MODE_CLASS (mode) == MODE_INT
10288 && GET_MODE_SIZE (mode) < 4)
10290 /* Slightly disparage, as we might need an extend operation. */
10291 *cost = 1 + COSTS_N_INSNS (1);
10292 if (speed_p)
10293 *cost += extra_cost->alu.arith;
10294 return false;
10297 if (mode == DImode)
10299 *cost = COSTS_N_INSNS (2);
10300 if (speed_p)
10301 *cost += 2 * extra_cost->alu.arith;
10302 return false;
10305 /* Vector mode? */
10306 *cost = LIBCALL_COST (1);
10307 return false;
10309 case NOT:
10310 if (mode == SImode)
10312 rtx shift_op;
10313 rtx shift_reg = NULL;
10315 *cost = COSTS_N_INSNS (1);
10316 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10318 if (shift_op)
10320 if (shift_reg != NULL)
10322 if (speed_p)
10323 *cost += extra_cost->alu.log_shift_reg;
10324 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10326 else if (speed_p)
10327 *cost += extra_cost->alu.log_shift;
10328 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10329 return true;
10332 if (speed_p)
10333 *cost += extra_cost->alu.logical;
10334 return false;
10336 if (mode == DImode)
10338 *cost = COSTS_N_INSNS (2);
10339 return false;
10342 /* Vector mode? */
10344 *cost += LIBCALL_COST (1);
10345 return false;
10347 case IF_THEN_ELSE:
10349 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10351 *cost = COSTS_N_INSNS (4);
10352 return true;
10354 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10355 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10357 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10358 /* Assume that if one arm of the if_then_else is a register,
10359 that it will be tied with the result and eliminate the
10360 conditional insn. */
10361 if (REG_P (XEXP (x, 1)))
10362 *cost += op2cost;
10363 else if (REG_P (XEXP (x, 2)))
10364 *cost += op1cost;
10365 else
10367 if (speed_p)
10369 if (extra_cost->alu.non_exec_costs_exec)
10370 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10371 else
10372 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10374 else
10375 *cost += op1cost + op2cost;
10378 return true;
10380 case COMPARE:
10381 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10382 *cost = 0;
10383 else
10385 machine_mode op0mode;
10386 /* We'll mostly assume that the cost of a compare is the cost of the
10387 LHS. However, there are some notable exceptions. */
10389 /* Floating point compares are never done as side-effects. */
10390 op0mode = GET_MODE (XEXP (x, 0));
10391 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10392 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10394 *cost = COSTS_N_INSNS (1);
10395 if (speed_p)
10396 *cost += extra_cost->fp[op0mode != SFmode].compare;
10398 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10400 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10401 return true;
10404 return false;
10406 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10408 *cost = LIBCALL_COST (2);
10409 return false;
10412 /* DImode compares normally take two insns. */
10413 if (op0mode == DImode)
10415 *cost = COSTS_N_INSNS (2);
10416 if (speed_p)
10417 *cost += 2 * extra_cost->alu.arith;
10418 return false;
10421 if (op0mode == SImode)
10423 rtx shift_op;
10424 rtx shift_reg;
10426 if (XEXP (x, 1) == const0_rtx
10427 && !(REG_P (XEXP (x, 0))
10428 || (GET_CODE (XEXP (x, 0)) == SUBREG
10429 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10431 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10433 /* Multiply operations that set the flags are often
10434 significantly more expensive. */
10435 if (speed_p
10436 && GET_CODE (XEXP (x, 0)) == MULT
10437 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10438 *cost += extra_cost->mult[0].flag_setting;
10440 if (speed_p
10441 && GET_CODE (XEXP (x, 0)) == PLUS
10442 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10443 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10444 0), 1), mode))
10445 *cost += extra_cost->mult[0].flag_setting;
10446 return true;
10449 shift_reg = NULL;
10450 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10451 if (shift_op != NULL)
10453 *cost = COSTS_N_INSNS (1);
10454 if (shift_reg != NULL)
10456 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10457 if (speed_p)
10458 *cost += extra_cost->alu.arith_shift_reg;
10460 else if (speed_p)
10461 *cost += extra_cost->alu.arith_shift;
10462 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10463 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10464 return true;
10467 *cost = COSTS_N_INSNS (1);
10468 if (speed_p)
10469 *cost += extra_cost->alu.arith;
10470 if (CONST_INT_P (XEXP (x, 1))
10471 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10473 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10474 return true;
10476 return false;
10479 /* Vector mode? */
10481 *cost = LIBCALL_COST (2);
10482 return false;
10484 return true;
10486 case EQ:
10487 case NE:
10488 case LT:
10489 case LE:
10490 case GT:
10491 case GE:
10492 case LTU:
10493 case LEU:
10494 case GEU:
10495 case GTU:
10496 case ORDERED:
10497 case UNORDERED:
10498 case UNEQ:
10499 case UNLE:
10500 case UNLT:
10501 case UNGE:
10502 case UNGT:
10503 case LTGT:
10504 if (outer_code == SET)
10506 /* Is it a store-flag operation? */
10507 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10508 && XEXP (x, 1) == const0_rtx)
10510 /* Thumb also needs an IT insn. */
10511 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10512 return true;
10514 if (XEXP (x, 1) == const0_rtx)
10516 switch (code)
10518 case LT:
10519 /* LSR Rd, Rn, #31. */
10520 *cost = COSTS_N_INSNS (1);
10521 if (speed_p)
10522 *cost += extra_cost->alu.shift;
10523 break;
10525 case EQ:
10526 /* RSBS T1, Rn, #0
10527 ADC Rd, Rn, T1. */
10529 case NE:
10530 /* SUBS T1, Rn, #1
10531 SBC Rd, Rn, T1. */
10532 *cost = COSTS_N_INSNS (2);
10533 break;
10535 case LE:
10536 /* RSBS T1, Rn, Rn, LSR #31
10537 ADC Rd, Rn, T1. */
10538 *cost = COSTS_N_INSNS (2);
10539 if (speed_p)
10540 *cost += extra_cost->alu.arith_shift;
10541 break;
10543 case GT:
10544 /* RSB Rd, Rn, Rn, ASR #1
10545 LSR Rd, Rd, #31. */
10546 *cost = COSTS_N_INSNS (2);
10547 if (speed_p)
10548 *cost += (extra_cost->alu.arith_shift
10549 + extra_cost->alu.shift);
10550 break;
10552 case GE:
10553 /* ASR Rd, Rn, #31
10554 ADD Rd, Rn, #1. */
10555 *cost = COSTS_N_INSNS (2);
10556 if (speed_p)
10557 *cost += extra_cost->alu.shift;
10558 break;
10560 default:
10561 /* Remaining cases are either meaningless or would take
10562 three insns anyway. */
10563 *cost = COSTS_N_INSNS (3);
10564 break;
10566 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10567 return true;
10569 else
10571 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10572 if (CONST_INT_P (XEXP (x, 1))
10573 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10575 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10576 return true;
10579 return false;
10582 /* Not directly inside a set. If it involves the condition code
10583 register it must be the condition for a branch, cond_exec or
10584 I_T_E operation. Since the comparison is performed elsewhere
10585 this is just the control part which has no additional
10586 cost. */
10587 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10588 && XEXP (x, 1) == const0_rtx)
10590 *cost = 0;
10591 return true;
10593 return false;
10595 case ABS:
10596 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10597 && (mode == SFmode || !TARGET_VFP_SINGLE))
10599 *cost = COSTS_N_INSNS (1);
10600 if (speed_p)
10601 *cost += extra_cost->fp[mode != SFmode].neg;
10603 return false;
10605 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10607 *cost = LIBCALL_COST (1);
10608 return false;
10611 if (mode == SImode)
10613 *cost = COSTS_N_INSNS (1);
10614 if (speed_p)
10615 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10616 return false;
10618 /* Vector mode? */
10619 *cost = LIBCALL_COST (1);
10620 return false;
10622 case SIGN_EXTEND:
10623 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10624 && MEM_P (XEXP (x, 0)))
10626 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10628 if (mode == DImode)
10629 *cost += COSTS_N_INSNS (1);
10631 if (!speed_p)
10632 return true;
10634 if (GET_MODE (XEXP (x, 0)) == SImode)
10635 *cost += extra_cost->ldst.load;
10636 else
10637 *cost += extra_cost->ldst.load_sign_extend;
10639 if (mode == DImode)
10640 *cost += extra_cost->alu.shift;
10642 return true;
10645 /* Widening from less than 32-bits requires an extend operation. */
10646 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10648 /* We have SXTB/SXTH. */
10649 *cost = COSTS_N_INSNS (1);
10650 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10651 if (speed_p)
10652 *cost += extra_cost->alu.extend;
10654 else if (GET_MODE (XEXP (x, 0)) != SImode)
10656 /* Needs two shifts. */
10657 *cost = COSTS_N_INSNS (2);
10658 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10659 if (speed_p)
10660 *cost += 2 * extra_cost->alu.shift;
10663 /* Widening beyond 32-bits requires one more insn. */
10664 if (mode == DImode)
10666 *cost += COSTS_N_INSNS (1);
10667 if (speed_p)
10668 *cost += extra_cost->alu.shift;
10671 return true;
10673 case ZERO_EXTEND:
10674 if ((arm_arch4
10675 || GET_MODE (XEXP (x, 0)) == SImode
10676 || GET_MODE (XEXP (x, 0)) == QImode)
10677 && MEM_P (XEXP (x, 0)))
10679 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10681 if (mode == DImode)
10682 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10684 return true;
10687 /* Widening from less than 32-bits requires an extend operation. */
10688 if (GET_MODE (XEXP (x, 0)) == QImode)
10690 /* UXTB can be a shorter instruction in Thumb2, but it might
10691 be slower than the AND Rd, Rn, #255 alternative. When
10692 optimizing for speed it should never be slower to use
10693 AND, and we don't really model 16-bit vs 32-bit insns
10694 here. */
10695 *cost = COSTS_N_INSNS (1);
10696 if (speed_p)
10697 *cost += extra_cost->alu.logical;
10699 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10701 /* We have UXTB/UXTH. */
10702 *cost = COSTS_N_INSNS (1);
10703 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10704 if (speed_p)
10705 *cost += extra_cost->alu.extend;
10707 else if (GET_MODE (XEXP (x, 0)) != SImode)
10709 /* Needs two shifts. It's marginally preferable to use
10710 shifts rather than two BIC instructions as the second
10711 shift may merge with a subsequent insn as a shifter
10712 op. */
10713 *cost = COSTS_N_INSNS (2);
10714 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10715 if (speed_p)
10716 *cost += 2 * extra_cost->alu.shift;
10718 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10719 *cost = COSTS_N_INSNS (1);
10721 /* Widening beyond 32-bits requires one more insn. */
10722 if (mode == DImode)
10724 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10727 return true;
10729 case CONST_INT:
10730 *cost = 0;
10731 /* CONST_INT has no mode, so we cannot tell for sure how many
10732 insns are really going to be needed. The best we can do is
10733 look at the value passed. If it fits in SImode, then assume
10734 that's the mode it will be used for. Otherwise assume it
10735 will be used in DImode. */
10736 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10737 mode = SImode;
10738 else
10739 mode = DImode;
10741 /* Avoid blowing up in arm_gen_constant (). */
10742 if (!(outer_code == PLUS
10743 || outer_code == AND
10744 || outer_code == IOR
10745 || outer_code == XOR
10746 || outer_code == MINUS))
10747 outer_code = SET;
10749 const_int_cost:
10750 if (mode == SImode)
10752 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10753 INTVAL (x), NULL, NULL,
10754 0, 0));
10755 /* Extra costs? */
10757 else
10759 *cost += COSTS_N_INSNS (arm_gen_constant
10760 (outer_code, SImode, NULL,
10761 trunc_int_for_mode (INTVAL (x), SImode),
10762 NULL, NULL, 0, 0)
10763 + arm_gen_constant (outer_code, SImode, NULL,
10764 INTVAL (x) >> 32, NULL,
10765 NULL, 0, 0));
10766 /* Extra costs? */
10769 return true;
10771 case CONST:
10772 case LABEL_REF:
10773 case SYMBOL_REF:
10774 if (speed_p)
10776 if (arm_arch_thumb2 && !flag_pic)
10777 *cost = COSTS_N_INSNS (2);
10778 else
10779 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10781 else
10782 *cost = COSTS_N_INSNS (2);
10784 if (flag_pic)
10786 *cost += COSTS_N_INSNS (1);
10787 if (speed_p)
10788 *cost += extra_cost->alu.arith;
10791 return true;
10793 case CONST_FIXED:
10794 *cost = COSTS_N_INSNS (4);
10795 /* Fixme. */
10796 return true;
10798 case CONST_DOUBLE:
10799 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10800 && (mode == SFmode || !TARGET_VFP_SINGLE))
10802 if (vfp3_const_double_rtx (x))
10804 *cost = COSTS_N_INSNS (1);
10805 if (speed_p)
10806 *cost += extra_cost->fp[mode == DFmode].fpconst;
10807 return true;
10810 if (speed_p)
10812 *cost = COSTS_N_INSNS (1);
10813 if (mode == DFmode)
10814 *cost += extra_cost->ldst.loadd;
10815 else
10816 *cost += extra_cost->ldst.loadf;
10818 else
10819 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10821 return true;
10823 *cost = COSTS_N_INSNS (4);
10824 return true;
10826 case CONST_VECTOR:
10827 /* Fixme. */
10828 if (TARGET_NEON
10829 && TARGET_HARD_FLOAT
10830 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10831 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10832 *cost = COSTS_N_INSNS (1);
10833 else
10834 *cost = COSTS_N_INSNS (4);
10835 return true;
10837 case HIGH:
10838 case LO_SUM:
10839 *cost = COSTS_N_INSNS (1);
10840 /* When optimizing for size, we prefer constant pool entries to
10841 MOVW/MOVT pairs, so bump the cost of these slightly. */
10842 if (!speed_p)
10843 *cost += 1;
10844 return true;
10846 case CLZ:
10847 *cost = COSTS_N_INSNS (1);
10848 if (speed_p)
10849 *cost += extra_cost->alu.clz;
10850 return false;
10852 case SMIN:
10853 if (XEXP (x, 1) == const0_rtx)
10855 *cost = COSTS_N_INSNS (1);
10856 if (speed_p)
10857 *cost += extra_cost->alu.log_shift;
10858 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10859 return true;
10861 /* Fall through. */
10862 case SMAX:
10863 case UMIN:
10864 case UMAX:
10865 *cost = COSTS_N_INSNS (2);
10866 return false;
10868 case TRUNCATE:
10869 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10870 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10871 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10872 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10873 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10874 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10875 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10876 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10877 == ZERO_EXTEND))))
10879 *cost = COSTS_N_INSNS (1);
10880 if (speed_p)
10881 *cost += extra_cost->mult[1].extend;
10882 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10883 speed_p)
10884 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10885 0, speed_p));
10886 return true;
10888 *cost = LIBCALL_COST (1);
10889 return false;
10891 case UNSPEC:
10892 return arm_unspec_cost (x, outer_code, speed_p, cost);
10894 case PC:
10895 /* Reading the PC is like reading any other register. Writing it
10896 is more expensive, but we take that into account elsewhere. */
10897 *cost = 0;
10898 return true;
10900 case ZERO_EXTRACT:
10901 /* TODO: Simple zero_extract of bottom bits using AND. */
10902 /* Fall through. */
10903 case SIGN_EXTRACT:
10904 if (arm_arch6
10905 && mode == SImode
10906 && CONST_INT_P (XEXP (x, 1))
10907 && CONST_INT_P (XEXP (x, 2)))
10909 *cost = COSTS_N_INSNS (1);
10910 if (speed_p)
10911 *cost += extra_cost->alu.bfx;
10912 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10913 return true;
10915 /* Without UBFX/SBFX, need to resort to shift operations. */
10916 *cost = COSTS_N_INSNS (2);
10917 if (speed_p)
10918 *cost += 2 * extra_cost->alu.shift;
10919 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10920 return true;
10922 case FLOAT_EXTEND:
10923 if (TARGET_HARD_FLOAT)
10925 *cost = COSTS_N_INSNS (1);
10926 if (speed_p)
10927 *cost += extra_cost->fp[mode == DFmode].widen;
10928 if (!TARGET_FPU_ARMV8
10929 && GET_MODE (XEXP (x, 0)) == HFmode)
10931 /* Pre v8, widening HF->DF is a two-step process, first
10932 widening to SFmode. */
10933 *cost += COSTS_N_INSNS (1);
10934 if (speed_p)
10935 *cost += extra_cost->fp[0].widen;
10937 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10938 return true;
10941 *cost = LIBCALL_COST (1);
10942 return false;
10944 case FLOAT_TRUNCATE:
10945 if (TARGET_HARD_FLOAT)
10947 *cost = COSTS_N_INSNS (1);
10948 if (speed_p)
10949 *cost += extra_cost->fp[mode == DFmode].narrow;
10950 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10951 return true;
10952 /* Vector modes? */
10954 *cost = LIBCALL_COST (1);
10955 return false;
10957 case FMA:
10958 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10960 rtx op0 = XEXP (x, 0);
10961 rtx op1 = XEXP (x, 1);
10962 rtx op2 = XEXP (x, 2);
10964 *cost = COSTS_N_INSNS (1);
10966 /* vfms or vfnma. */
10967 if (GET_CODE (op0) == NEG)
10968 op0 = XEXP (op0, 0);
10970 /* vfnms or vfnma. */
10971 if (GET_CODE (op2) == NEG)
10972 op2 = XEXP (op2, 0);
10974 *cost += rtx_cost (op0, FMA, 0, speed_p);
10975 *cost += rtx_cost (op1, FMA, 1, speed_p);
10976 *cost += rtx_cost (op2, FMA, 2, speed_p);
10978 if (speed_p)
10979 *cost += extra_cost->fp[mode ==DFmode].fma;
10981 return true;
10984 *cost = LIBCALL_COST (3);
10985 return false;
10987 case FIX:
10988 case UNSIGNED_FIX:
10989 if (TARGET_HARD_FLOAT)
10991 if (GET_MODE_CLASS (mode) == MODE_INT)
10993 *cost = COSTS_N_INSNS (1);
10994 if (speed_p)
10995 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
10996 /* Strip of the 'cost' of rounding towards zero. */
10997 if (GET_CODE (XEXP (x, 0)) == FIX)
10998 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
10999 else
11000 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11001 /* ??? Increase the cost to deal with transferring from
11002 FP -> CORE registers? */
11003 return true;
11005 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11006 && TARGET_FPU_ARMV8)
11008 *cost = COSTS_N_INSNS (1);
11009 if (speed_p)
11010 *cost += extra_cost->fp[mode == DFmode].roundint;
11011 return false;
11013 /* Vector costs? */
11015 *cost = LIBCALL_COST (1);
11016 return false;
11018 case FLOAT:
11019 case UNSIGNED_FLOAT:
11020 if (TARGET_HARD_FLOAT)
11022 /* ??? Increase the cost to deal with transferring from CORE
11023 -> FP registers? */
11024 *cost = COSTS_N_INSNS (1);
11025 if (speed_p)
11026 *cost += extra_cost->fp[mode == DFmode].fromint;
11027 return false;
11029 *cost = LIBCALL_COST (1);
11030 return false;
11032 case CALL:
11033 *cost = COSTS_N_INSNS (1);
11034 return true;
11036 case ASM_OPERANDS:
11038 /* Just a guess. Guess number of instructions in the asm
11039 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11040 though (see PR60663). */
11041 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11042 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11044 *cost = COSTS_N_INSNS (asm_length + num_operands);
11045 return true;
11047 default:
11048 if (mode != VOIDmode)
11049 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11050 else
11051 *cost = COSTS_N_INSNS (4); /* Who knows? */
11052 return false;
11056 #undef HANDLE_NARROW_SHIFT_ARITH
11058 /* RTX costs when optimizing for size. */
11059 static bool
11060 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
11061 int *total, bool speed)
11063 bool result;
11065 if (TARGET_OLD_RTX_COSTS
11066 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
11068 /* Old way. (Deprecated.) */
11069 if (!speed)
11070 result = arm_size_rtx_costs (x, (enum rtx_code) code,
11071 (enum rtx_code) outer_code, total);
11072 else
11073 result = current_tune->rtx_costs (x, (enum rtx_code) code,
11074 (enum rtx_code) outer_code, total,
11075 speed);
11077 else
11079 /* New way. */
11080 if (current_tune->insn_extra_cost)
11081 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11082 (enum rtx_code) outer_code,
11083 current_tune->insn_extra_cost,
11084 total, speed);
11085 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11086 && current_tune->insn_extra_cost != NULL */
11087 else
11088 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11089 (enum rtx_code) outer_code,
11090 &generic_extra_costs, total, speed);
11093 if (dump_file && (dump_flags & TDF_DETAILS))
11095 print_rtl_single (dump_file, x);
11096 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11097 *total, result ? "final" : "partial");
11099 return result;
11102 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11103 supported on any "slowmul" cores, so it can be ignored. */
11105 static bool
11106 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11107 int *total, bool speed)
11109 machine_mode mode = GET_MODE (x);
11111 if (TARGET_THUMB)
11113 *total = thumb1_rtx_costs (x, code, outer_code);
11114 return true;
11117 switch (code)
11119 case MULT:
11120 if (GET_MODE_CLASS (mode) == MODE_FLOAT
11121 || mode == DImode)
11123 *total = COSTS_N_INSNS (20);
11124 return false;
11127 if (CONST_INT_P (XEXP (x, 1)))
11129 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11130 & (unsigned HOST_WIDE_INT) 0xffffffff);
11131 int cost, const_ok = const_ok_for_arm (i);
11132 int j, booth_unit_size;
11134 /* Tune as appropriate. */
11135 cost = const_ok ? 4 : 8;
11136 booth_unit_size = 2;
11137 for (j = 0; i && j < 32; j += booth_unit_size)
11139 i >>= booth_unit_size;
11140 cost++;
11143 *total = COSTS_N_INSNS (cost);
11144 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
11145 return true;
11148 *total = COSTS_N_INSNS (20);
11149 return false;
11151 default:
11152 return arm_rtx_costs_1 (x, outer_code, total, speed);;
11157 /* RTX cost for cores with a fast multiply unit (M variants). */
11159 static bool
11160 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11161 int *total, bool speed)
11163 machine_mode mode = GET_MODE (x);
11165 if (TARGET_THUMB1)
11167 *total = thumb1_rtx_costs (x, code, outer_code);
11168 return true;
11171 /* ??? should thumb2 use different costs? */
11172 switch (code)
11174 case MULT:
11175 /* There is no point basing this on the tuning, since it is always the
11176 fast variant if it exists at all. */
11177 if (mode == DImode
11178 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11179 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11180 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11182 *total = COSTS_N_INSNS(2);
11183 return false;
11187 if (mode == DImode)
11189 *total = COSTS_N_INSNS (5);
11190 return false;
11193 if (CONST_INT_P (XEXP (x, 1)))
11195 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11196 & (unsigned HOST_WIDE_INT) 0xffffffff);
11197 int cost, const_ok = const_ok_for_arm (i);
11198 int j, booth_unit_size;
11200 /* Tune as appropriate. */
11201 cost = const_ok ? 4 : 8;
11202 booth_unit_size = 8;
11203 for (j = 0; i && j < 32; j += booth_unit_size)
11205 i >>= booth_unit_size;
11206 cost++;
11209 *total = COSTS_N_INSNS(cost);
11210 return false;
11213 if (mode == SImode)
11215 *total = COSTS_N_INSNS (4);
11216 return false;
11219 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11221 if (TARGET_HARD_FLOAT
11222 && (mode == SFmode
11223 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11225 *total = COSTS_N_INSNS (1);
11226 return false;
11230 /* Requires a lib call */
11231 *total = COSTS_N_INSNS (20);
11232 return false;
11234 default:
11235 return arm_rtx_costs_1 (x, outer_code, total, speed);
11240 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11241 so it can be ignored. */
11243 static bool
11244 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11245 int *total, bool speed)
11247 machine_mode mode = GET_MODE (x);
11249 if (TARGET_THUMB)
11251 *total = thumb1_rtx_costs (x, code, outer_code);
11252 return true;
11255 switch (code)
11257 case COMPARE:
11258 if (GET_CODE (XEXP (x, 0)) != MULT)
11259 return arm_rtx_costs_1 (x, outer_code, total, speed);
11261 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11262 will stall until the multiplication is complete. */
11263 *total = COSTS_N_INSNS (3);
11264 return false;
11266 case MULT:
11267 /* There is no point basing this on the tuning, since it is always the
11268 fast variant if it exists at all. */
11269 if (mode == DImode
11270 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11271 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11272 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11274 *total = COSTS_N_INSNS (2);
11275 return false;
11279 if (mode == DImode)
11281 *total = COSTS_N_INSNS (5);
11282 return false;
11285 if (CONST_INT_P (XEXP (x, 1)))
11287 /* If operand 1 is a constant we can more accurately
11288 calculate the cost of the multiply. The multiplier can
11289 retire 15 bits on the first cycle and a further 12 on the
11290 second. We do, of course, have to load the constant into
11291 a register first. */
11292 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11293 /* There's a general overhead of one cycle. */
11294 int cost = 1;
11295 unsigned HOST_WIDE_INT masked_const;
11297 if (i & 0x80000000)
11298 i = ~i;
11300 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11302 masked_const = i & 0xffff8000;
11303 if (masked_const != 0)
11305 cost++;
11306 masked_const = i & 0xf8000000;
11307 if (masked_const != 0)
11308 cost++;
11310 *total = COSTS_N_INSNS (cost);
11311 return false;
11314 if (mode == SImode)
11316 *total = COSTS_N_INSNS (3);
11317 return false;
11320 /* Requires a lib call */
11321 *total = COSTS_N_INSNS (20);
11322 return false;
11324 default:
11325 return arm_rtx_costs_1 (x, outer_code, total, speed);
11330 /* RTX costs for 9e (and later) cores. */
11332 static bool
11333 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11334 int *total, bool speed)
11336 machine_mode mode = GET_MODE (x);
11338 if (TARGET_THUMB1)
11340 switch (code)
11342 case MULT:
11343 /* Small multiply: 32 cycles for an integer multiply inst. */
11344 if (arm_arch6m && arm_m_profile_small_mul)
11345 *total = COSTS_N_INSNS (32);
11346 else
11347 *total = COSTS_N_INSNS (3);
11348 return true;
11350 default:
11351 *total = thumb1_rtx_costs (x, code, outer_code);
11352 return true;
11356 switch (code)
11358 case MULT:
11359 /* There is no point basing this on the tuning, since it is always the
11360 fast variant if it exists at all. */
11361 if (mode == DImode
11362 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11363 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11364 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11366 *total = COSTS_N_INSNS (2);
11367 return false;
11371 if (mode == DImode)
11373 *total = COSTS_N_INSNS (5);
11374 return false;
11377 if (mode == SImode)
11379 *total = COSTS_N_INSNS (2);
11380 return false;
11383 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11385 if (TARGET_HARD_FLOAT
11386 && (mode == SFmode
11387 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11389 *total = COSTS_N_INSNS (1);
11390 return false;
11394 *total = COSTS_N_INSNS (20);
11395 return false;
11397 default:
11398 return arm_rtx_costs_1 (x, outer_code, total, speed);
11401 /* All address computations that can be done are free, but rtx cost returns
11402 the same for practically all of them. So we weight the different types
11403 of address here in the order (most pref first):
11404 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11405 static inline int
11406 arm_arm_address_cost (rtx x)
11408 enum rtx_code c = GET_CODE (x);
11410 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11411 return 0;
11412 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11413 return 10;
11415 if (c == PLUS)
11417 if (CONST_INT_P (XEXP (x, 1)))
11418 return 2;
11420 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11421 return 3;
11423 return 4;
11426 return 6;
11429 static inline int
11430 arm_thumb_address_cost (rtx x)
11432 enum rtx_code c = GET_CODE (x);
11434 if (c == REG)
11435 return 1;
11436 if (c == PLUS
11437 && REG_P (XEXP (x, 0))
11438 && CONST_INT_P (XEXP (x, 1)))
11439 return 1;
11441 return 2;
11444 static int
11445 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11446 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11448 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11451 /* Adjust cost hook for XScale. */
11452 static bool
11453 xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11455 /* Some true dependencies can have a higher cost depending
11456 on precisely how certain input operands are used. */
11457 if (REG_NOTE_KIND(link) == 0
11458 && recog_memoized (insn) >= 0
11459 && recog_memoized (dep) >= 0)
11461 int shift_opnum = get_attr_shift (insn);
11462 enum attr_type attr_type = get_attr_type (dep);
11464 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11465 operand for INSN. If we have a shifted input operand and the
11466 instruction we depend on is another ALU instruction, then we may
11467 have to account for an additional stall. */
11468 if (shift_opnum != 0
11469 && (attr_type == TYPE_ALU_SHIFT_IMM
11470 || attr_type == TYPE_ALUS_SHIFT_IMM
11471 || attr_type == TYPE_LOGIC_SHIFT_IMM
11472 || attr_type == TYPE_LOGICS_SHIFT_IMM
11473 || attr_type == TYPE_ALU_SHIFT_REG
11474 || attr_type == TYPE_ALUS_SHIFT_REG
11475 || attr_type == TYPE_LOGIC_SHIFT_REG
11476 || attr_type == TYPE_LOGICS_SHIFT_REG
11477 || attr_type == TYPE_MOV_SHIFT
11478 || attr_type == TYPE_MVN_SHIFT
11479 || attr_type == TYPE_MOV_SHIFT_REG
11480 || attr_type == TYPE_MVN_SHIFT_REG))
11482 rtx shifted_operand;
11483 int opno;
11485 /* Get the shifted operand. */
11486 extract_insn (insn);
11487 shifted_operand = recog_data.operand[shift_opnum];
11489 /* Iterate over all the operands in DEP. If we write an operand
11490 that overlaps with SHIFTED_OPERAND, then we have increase the
11491 cost of this dependency. */
11492 extract_insn (dep);
11493 preprocess_constraints (dep);
11494 for (opno = 0; opno < recog_data.n_operands; opno++)
11496 /* We can ignore strict inputs. */
11497 if (recog_data.operand_type[opno] == OP_IN)
11498 continue;
11500 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11501 shifted_operand))
11503 *cost = 2;
11504 return false;
11509 return true;
11512 /* Adjust cost hook for Cortex A9. */
11513 static bool
11514 cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11516 switch (REG_NOTE_KIND (link))
11518 case REG_DEP_ANTI:
11519 *cost = 0;
11520 return false;
11522 case REG_DEP_TRUE:
11523 case REG_DEP_OUTPUT:
11524 if (recog_memoized (insn) >= 0
11525 && recog_memoized (dep) >= 0)
11527 if (GET_CODE (PATTERN (insn)) == SET)
11529 if (GET_MODE_CLASS
11530 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11531 || GET_MODE_CLASS
11532 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11534 enum attr_type attr_type_insn = get_attr_type (insn);
11535 enum attr_type attr_type_dep = get_attr_type (dep);
11537 /* By default all dependencies of the form
11538 s0 = s0 <op> s1
11539 s0 = s0 <op> s2
11540 have an extra latency of 1 cycle because
11541 of the input and output dependency in this
11542 case. However this gets modeled as an true
11543 dependency and hence all these checks. */
11544 if (REG_P (SET_DEST (PATTERN (insn)))
11545 && REG_P (SET_DEST (PATTERN (dep)))
11546 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11547 SET_DEST (PATTERN (dep))))
11549 /* FMACS is a special case where the dependent
11550 instruction can be issued 3 cycles before
11551 the normal latency in case of an output
11552 dependency. */
11553 if ((attr_type_insn == TYPE_FMACS
11554 || attr_type_insn == TYPE_FMACD)
11555 && (attr_type_dep == TYPE_FMACS
11556 || attr_type_dep == TYPE_FMACD))
11558 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11559 *cost = insn_default_latency (dep) - 3;
11560 else
11561 *cost = insn_default_latency (dep);
11562 return false;
11564 else
11566 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11567 *cost = insn_default_latency (dep) + 1;
11568 else
11569 *cost = insn_default_latency (dep);
11571 return false;
11576 break;
11578 default:
11579 gcc_unreachable ();
11582 return true;
11585 /* Adjust cost hook for FA726TE. */
11586 static bool
11587 fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11589 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11590 have penalty of 3. */
11591 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11592 && recog_memoized (insn) >= 0
11593 && recog_memoized (dep) >= 0
11594 && get_attr_conds (dep) == CONDS_SET)
11596 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11597 if (get_attr_conds (insn) == CONDS_USE
11598 && get_attr_type (insn) != TYPE_BRANCH)
11600 *cost = 3;
11601 return false;
11604 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11605 || get_attr_conds (insn) == CONDS_USE)
11607 *cost = 0;
11608 return false;
11612 return true;
11615 /* Implement TARGET_REGISTER_MOVE_COST.
11617 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11618 it is typically more expensive than a single memory access. We set
11619 the cost to less than two memory accesses so that floating
11620 point to integer conversion does not go through memory. */
11623 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11624 reg_class_t from, reg_class_t to)
11626 if (TARGET_32BIT)
11628 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11629 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11630 return 15;
11631 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11632 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11633 return 4;
11634 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11635 return 20;
11636 else
11637 return 2;
11639 else
11641 if (from == HI_REGS || to == HI_REGS)
11642 return 4;
11643 else
11644 return 2;
11648 /* Implement TARGET_MEMORY_MOVE_COST. */
11651 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11652 bool in ATTRIBUTE_UNUSED)
11654 if (TARGET_32BIT)
11655 return 10;
11656 else
11658 if (GET_MODE_SIZE (mode) < 4)
11659 return 8;
11660 else
11661 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11665 /* Vectorizer cost model implementation. */
11667 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11668 static int
11669 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11670 tree vectype,
11671 int misalign ATTRIBUTE_UNUSED)
11673 unsigned elements;
11675 switch (type_of_cost)
11677 case scalar_stmt:
11678 return current_tune->vec_costs->scalar_stmt_cost;
11680 case scalar_load:
11681 return current_tune->vec_costs->scalar_load_cost;
11683 case scalar_store:
11684 return current_tune->vec_costs->scalar_store_cost;
11686 case vector_stmt:
11687 return current_tune->vec_costs->vec_stmt_cost;
11689 case vector_load:
11690 return current_tune->vec_costs->vec_align_load_cost;
11692 case vector_store:
11693 return current_tune->vec_costs->vec_store_cost;
11695 case vec_to_scalar:
11696 return current_tune->vec_costs->vec_to_scalar_cost;
11698 case scalar_to_vec:
11699 return current_tune->vec_costs->scalar_to_vec_cost;
11701 case unaligned_load:
11702 return current_tune->vec_costs->vec_unalign_load_cost;
11704 case unaligned_store:
11705 return current_tune->vec_costs->vec_unalign_store_cost;
11707 case cond_branch_taken:
11708 return current_tune->vec_costs->cond_taken_branch_cost;
11710 case cond_branch_not_taken:
11711 return current_tune->vec_costs->cond_not_taken_branch_cost;
11713 case vec_perm:
11714 case vec_promote_demote:
11715 return current_tune->vec_costs->vec_stmt_cost;
11717 case vec_construct:
11718 elements = TYPE_VECTOR_SUBPARTS (vectype);
11719 return elements / 2 + 1;
11721 default:
11722 gcc_unreachable ();
11726 /* Implement targetm.vectorize.add_stmt_cost. */
11728 static unsigned
11729 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11730 struct _stmt_vec_info *stmt_info, int misalign,
11731 enum vect_cost_model_location where)
11733 unsigned *cost = (unsigned *) data;
11734 unsigned retval = 0;
11736 if (flag_vect_cost_model)
11738 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11739 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11741 /* Statements in an inner loop relative to the loop being
11742 vectorized are weighted more heavily. The value here is
11743 arbitrary and could potentially be improved with analysis. */
11744 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11745 count *= 50; /* FIXME. */
11747 retval = (unsigned) (count * stmt_cost);
11748 cost[where] += retval;
11751 return retval;
11754 /* Return true if and only if this insn can dual-issue only as older. */
11755 static bool
11756 cortexa7_older_only (rtx_insn *insn)
11758 if (recog_memoized (insn) < 0)
11759 return false;
11761 switch (get_attr_type (insn))
11763 case TYPE_ALU_DSP_REG:
11764 case TYPE_ALU_SREG:
11765 case TYPE_ALUS_SREG:
11766 case TYPE_LOGIC_REG:
11767 case TYPE_LOGICS_REG:
11768 case TYPE_ADC_REG:
11769 case TYPE_ADCS_REG:
11770 case TYPE_ADR:
11771 case TYPE_BFM:
11772 case TYPE_REV:
11773 case TYPE_MVN_REG:
11774 case TYPE_SHIFT_IMM:
11775 case TYPE_SHIFT_REG:
11776 case TYPE_LOAD_BYTE:
11777 case TYPE_LOAD1:
11778 case TYPE_STORE1:
11779 case TYPE_FFARITHS:
11780 case TYPE_FADDS:
11781 case TYPE_FFARITHD:
11782 case TYPE_FADDD:
11783 case TYPE_FMOV:
11784 case TYPE_F_CVT:
11785 case TYPE_FCMPS:
11786 case TYPE_FCMPD:
11787 case TYPE_FCONSTS:
11788 case TYPE_FCONSTD:
11789 case TYPE_FMULS:
11790 case TYPE_FMACS:
11791 case TYPE_FMULD:
11792 case TYPE_FMACD:
11793 case TYPE_FDIVS:
11794 case TYPE_FDIVD:
11795 case TYPE_F_MRC:
11796 case TYPE_F_MRRC:
11797 case TYPE_F_FLAG:
11798 case TYPE_F_LOADS:
11799 case TYPE_F_STORES:
11800 return true;
11801 default:
11802 return false;
11806 /* Return true if and only if this insn can dual-issue as younger. */
11807 static bool
11808 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11810 if (recog_memoized (insn) < 0)
11812 if (verbose > 5)
11813 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11814 return false;
11817 switch (get_attr_type (insn))
11819 case TYPE_ALU_IMM:
11820 case TYPE_ALUS_IMM:
11821 case TYPE_LOGIC_IMM:
11822 case TYPE_LOGICS_IMM:
11823 case TYPE_EXTEND:
11824 case TYPE_MVN_IMM:
11825 case TYPE_MOV_IMM:
11826 case TYPE_MOV_REG:
11827 case TYPE_MOV_SHIFT:
11828 case TYPE_MOV_SHIFT_REG:
11829 case TYPE_BRANCH:
11830 case TYPE_CALL:
11831 return true;
11832 default:
11833 return false;
11838 /* Look for an instruction that can dual issue only as an older
11839 instruction, and move it in front of any instructions that can
11840 dual-issue as younger, while preserving the relative order of all
11841 other instructions in the ready list. This is a hueuristic to help
11842 dual-issue in later cycles, by postponing issue of more flexible
11843 instructions. This heuristic may affect dual issue opportunities
11844 in the current cycle. */
11845 static void
11846 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11847 int *n_readyp, int clock)
11849 int i;
11850 int first_older_only = -1, first_younger = -1;
11852 if (verbose > 5)
11853 fprintf (file,
11854 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11855 clock,
11856 *n_readyp);
11858 /* Traverse the ready list from the head (the instruction to issue
11859 first), and looking for the first instruction that can issue as
11860 younger and the first instruction that can dual-issue only as
11861 older. */
11862 for (i = *n_readyp - 1; i >= 0; i--)
11864 rtx_insn *insn = ready[i];
11865 if (cortexa7_older_only (insn))
11867 first_older_only = i;
11868 if (verbose > 5)
11869 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11870 break;
11872 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11873 first_younger = i;
11876 /* Nothing to reorder because either no younger insn found or insn
11877 that can dual-issue only as older appears before any insn that
11878 can dual-issue as younger. */
11879 if (first_younger == -1)
11881 if (verbose > 5)
11882 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11883 return;
11886 /* Nothing to reorder because no older-only insn in the ready list. */
11887 if (first_older_only == -1)
11889 if (verbose > 5)
11890 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11891 return;
11894 /* Move first_older_only insn before first_younger. */
11895 if (verbose > 5)
11896 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11897 INSN_UID(ready [first_older_only]),
11898 INSN_UID(ready [first_younger]));
11899 rtx_insn *first_older_only_insn = ready [first_older_only];
11900 for (i = first_older_only; i < first_younger; i++)
11902 ready[i] = ready[i+1];
11905 ready[i] = first_older_only_insn;
11906 return;
11909 /* Implement TARGET_SCHED_REORDER. */
11910 static int
11911 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11912 int clock)
11914 switch (arm_tune)
11916 case cortexa7:
11917 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11918 break;
11919 default:
11920 /* Do nothing for other cores. */
11921 break;
11924 return arm_issue_rate ();
11927 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11928 It corrects the value of COST based on the relationship between
11929 INSN and DEP through the dependence LINK. It returns the new
11930 value. There is a per-core adjust_cost hook to adjust scheduler costs
11931 and the per-core hook can choose to completely override the generic
11932 adjust_cost function. Only put bits of code into arm_adjust_cost that
11933 are common across all cores. */
11934 static int
11935 arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
11937 rtx i_pat, d_pat;
11939 /* When generating Thumb-1 code, we want to place flag-setting operations
11940 close to a conditional branch which depends on them, so that we can
11941 omit the comparison. */
11942 if (TARGET_THUMB1
11943 && REG_NOTE_KIND (link) == 0
11944 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11945 && recog_memoized (dep) >= 0
11946 && get_attr_conds (dep) == CONDS_SET)
11947 return 0;
11949 if (current_tune->sched_adjust_cost != NULL)
11951 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11952 return cost;
11955 /* XXX Is this strictly true? */
11956 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11957 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11958 return 0;
11960 /* Call insns don't incur a stall, even if they follow a load. */
11961 if (REG_NOTE_KIND (link) == 0
11962 && CALL_P (insn))
11963 return 1;
11965 if ((i_pat = single_set (insn)) != NULL
11966 && MEM_P (SET_SRC (i_pat))
11967 && (d_pat = single_set (dep)) != NULL
11968 && MEM_P (SET_DEST (d_pat)))
11970 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11971 /* This is a load after a store, there is no conflict if the load reads
11972 from a cached area. Assume that loads from the stack, and from the
11973 constant pool are cached, and that others will miss. This is a
11974 hack. */
11976 if ((GET_CODE (src_mem) == SYMBOL_REF
11977 && CONSTANT_POOL_ADDRESS_P (src_mem))
11978 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11979 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11980 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11981 return 1;
11984 return cost;
11988 arm_max_conditional_execute (void)
11990 return max_insns_skipped;
11993 static int
11994 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11996 if (TARGET_32BIT)
11997 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11998 else
11999 return (optimize > 0) ? 2 : 0;
12002 static int
12003 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12005 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12008 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12009 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12010 sequences of non-executed instructions in IT blocks probably take the same
12011 amount of time as executed instructions (and the IT instruction itself takes
12012 space in icache). This function was experimentally determined to give good
12013 results on a popular embedded benchmark. */
12015 static int
12016 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12018 return (TARGET_32BIT && speed_p) ? 1
12019 : arm_default_branch_cost (speed_p, predictable_p);
12022 static bool fp_consts_inited = false;
12024 static REAL_VALUE_TYPE value_fp0;
12026 static void
12027 init_fp_table (void)
12029 REAL_VALUE_TYPE r;
12031 r = REAL_VALUE_ATOF ("0", DFmode);
12032 value_fp0 = r;
12033 fp_consts_inited = true;
12036 /* Return TRUE if rtx X is a valid immediate FP constant. */
12038 arm_const_double_rtx (rtx x)
12040 REAL_VALUE_TYPE r;
12042 if (!fp_consts_inited)
12043 init_fp_table ();
12045 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12046 if (REAL_VALUE_MINUS_ZERO (r))
12047 return 0;
12049 if (REAL_VALUES_EQUAL (r, value_fp0))
12050 return 1;
12052 return 0;
12055 /* VFPv3 has a fairly wide range of representable immediates, formed from
12056 "quarter-precision" floating-point values. These can be evaluated using this
12057 formula (with ^ for exponentiation):
12059 -1^s * n * 2^-r
12061 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12062 16 <= n <= 31 and 0 <= r <= 7.
12064 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12066 - A (most-significant) is the sign bit.
12067 - BCD are the exponent (encoded as r XOR 3).
12068 - EFGH are the mantissa (encoded as n - 16).
12071 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12072 fconst[sd] instruction, or -1 if X isn't suitable. */
12073 static int
12074 vfp3_const_double_index (rtx x)
12076 REAL_VALUE_TYPE r, m;
12077 int sign, exponent;
12078 unsigned HOST_WIDE_INT mantissa, mant_hi;
12079 unsigned HOST_WIDE_INT mask;
12080 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12081 bool fail;
12083 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12084 return -1;
12086 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12088 /* We can't represent these things, so detect them first. */
12089 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12090 return -1;
12092 /* Extract sign, exponent and mantissa. */
12093 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12094 r = real_value_abs (&r);
12095 exponent = REAL_EXP (&r);
12096 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12097 highest (sign) bit, with a fixed binary point at bit point_pos.
12098 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12099 bits for the mantissa, this may fail (low bits would be lost). */
12100 real_ldexp (&m, &r, point_pos - exponent);
12101 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12102 mantissa = w.elt (0);
12103 mant_hi = w.elt (1);
12105 /* If there are bits set in the low part of the mantissa, we can't
12106 represent this value. */
12107 if (mantissa != 0)
12108 return -1;
12110 /* Now make it so that mantissa contains the most-significant bits, and move
12111 the point_pos to indicate that the least-significant bits have been
12112 discarded. */
12113 point_pos -= HOST_BITS_PER_WIDE_INT;
12114 mantissa = mant_hi;
12116 /* We can permit four significant bits of mantissa only, plus a high bit
12117 which is always 1. */
12118 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12119 if ((mantissa & mask) != 0)
12120 return -1;
12122 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12123 mantissa >>= point_pos - 5;
12125 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12126 floating-point immediate zero with Neon using an integer-zero load, but
12127 that case is handled elsewhere.) */
12128 if (mantissa == 0)
12129 return -1;
12131 gcc_assert (mantissa >= 16 && mantissa <= 31);
12133 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12134 normalized significands are in the range [1, 2). (Our mantissa is shifted
12135 left 4 places at this point relative to normalized IEEE754 values). GCC
12136 internally uses [0.5, 1) (see real.c), so the exponent returned from
12137 REAL_EXP must be altered. */
12138 exponent = 5 - exponent;
12140 if (exponent < 0 || exponent > 7)
12141 return -1;
12143 /* Sign, mantissa and exponent are now in the correct form to plug into the
12144 formula described in the comment above. */
12145 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12148 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12150 vfp3_const_double_rtx (rtx x)
12152 if (!TARGET_VFP3)
12153 return 0;
12155 return vfp3_const_double_index (x) != -1;
12158 /* Recognize immediates which can be used in various Neon instructions. Legal
12159 immediates are described by the following table (for VMVN variants, the
12160 bitwise inverse of the constant shown is recognized. In either case, VMOV
12161 is output and the correct instruction to use for a given constant is chosen
12162 by the assembler). The constant shown is replicated across all elements of
12163 the destination vector.
12165 insn elems variant constant (binary)
12166 ---- ----- ------- -----------------
12167 vmov i32 0 00000000 00000000 00000000 abcdefgh
12168 vmov i32 1 00000000 00000000 abcdefgh 00000000
12169 vmov i32 2 00000000 abcdefgh 00000000 00000000
12170 vmov i32 3 abcdefgh 00000000 00000000 00000000
12171 vmov i16 4 00000000 abcdefgh
12172 vmov i16 5 abcdefgh 00000000
12173 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12174 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12175 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12176 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12177 vmvn i16 10 00000000 abcdefgh
12178 vmvn i16 11 abcdefgh 00000000
12179 vmov i32 12 00000000 00000000 abcdefgh 11111111
12180 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12181 vmov i32 14 00000000 abcdefgh 11111111 11111111
12182 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12183 vmov i8 16 abcdefgh
12184 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12185 eeeeeeee ffffffff gggggggg hhhhhhhh
12186 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12187 vmov f32 19 00000000 00000000 00000000 00000000
12189 For case 18, B = !b. Representable values are exactly those accepted by
12190 vfp3_const_double_index, but are output as floating-point numbers rather
12191 than indices.
12193 For case 19, we will change it to vmov.i32 when assembling.
12195 Variants 0-5 (inclusive) may also be used as immediates for the second
12196 operand of VORR/VBIC instructions.
12198 The INVERSE argument causes the bitwise inverse of the given operand to be
12199 recognized instead (used for recognizing legal immediates for the VAND/VORN
12200 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12201 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12202 output, rather than the real insns vbic/vorr).
12204 INVERSE makes no difference to the recognition of float vectors.
12206 The return value is the variant of immediate as shown in the above table, or
12207 -1 if the given value doesn't match any of the listed patterns.
12209 static int
12210 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12211 rtx *modconst, int *elementwidth)
12213 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12214 matches = 1; \
12215 for (i = 0; i < idx; i += (STRIDE)) \
12216 if (!(TEST)) \
12217 matches = 0; \
12218 if (matches) \
12220 immtype = (CLASS); \
12221 elsize = (ELSIZE); \
12222 break; \
12225 unsigned int i, elsize = 0, idx = 0, n_elts;
12226 unsigned int innersize;
12227 unsigned char bytes[16];
12228 int immtype = -1, matches;
12229 unsigned int invmask = inverse ? 0xff : 0;
12230 bool vector = GET_CODE (op) == CONST_VECTOR;
12232 if (vector)
12234 n_elts = CONST_VECTOR_NUNITS (op);
12235 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12237 else
12239 n_elts = 1;
12240 if (mode == VOIDmode)
12241 mode = DImode;
12242 innersize = GET_MODE_SIZE (mode);
12245 /* Vectors of float constants. */
12246 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12248 rtx el0 = CONST_VECTOR_ELT (op, 0);
12249 REAL_VALUE_TYPE r0;
12251 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12252 return -1;
12254 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12256 for (i = 1; i < n_elts; i++)
12258 rtx elt = CONST_VECTOR_ELT (op, i);
12259 REAL_VALUE_TYPE re;
12261 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12263 if (!REAL_VALUES_EQUAL (r0, re))
12264 return -1;
12267 if (modconst)
12268 *modconst = CONST_VECTOR_ELT (op, 0);
12270 if (elementwidth)
12271 *elementwidth = 0;
12273 if (el0 == CONST0_RTX (GET_MODE (el0)))
12274 return 19;
12275 else
12276 return 18;
12279 /* Splat vector constant out into a byte vector. */
12280 for (i = 0; i < n_elts; i++)
12282 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12283 unsigned HOST_WIDE_INT elpart;
12284 unsigned int part, parts;
12286 if (CONST_INT_P (el))
12288 elpart = INTVAL (el);
12289 parts = 1;
12291 else if (CONST_DOUBLE_P (el))
12293 elpart = CONST_DOUBLE_LOW (el);
12294 parts = 2;
12296 else
12297 gcc_unreachable ();
12299 for (part = 0; part < parts; part++)
12301 unsigned int byte;
12302 for (byte = 0; byte < innersize; byte++)
12304 bytes[idx++] = (elpart & 0xff) ^ invmask;
12305 elpart >>= BITS_PER_UNIT;
12307 if (CONST_DOUBLE_P (el))
12308 elpart = CONST_DOUBLE_HIGH (el);
12312 /* Sanity check. */
12313 gcc_assert (idx == GET_MODE_SIZE (mode));
12317 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12318 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12320 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12321 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12323 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12324 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12326 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12327 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12329 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12331 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12333 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12334 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12336 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12337 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12339 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12340 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12342 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12343 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12345 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12347 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12349 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12350 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12352 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12353 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12355 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12356 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12358 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12359 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12361 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12363 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12364 && bytes[i] == bytes[(i + 8) % idx]);
12366 while (0);
12368 if (immtype == -1)
12369 return -1;
12371 if (elementwidth)
12372 *elementwidth = elsize;
12374 if (modconst)
12376 unsigned HOST_WIDE_INT imm = 0;
12378 /* Un-invert bytes of recognized vector, if necessary. */
12379 if (invmask != 0)
12380 for (i = 0; i < idx; i++)
12381 bytes[i] ^= invmask;
12383 if (immtype == 17)
12385 /* FIXME: Broken on 32-bit H_W_I hosts. */
12386 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12388 for (i = 0; i < 8; i++)
12389 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12390 << (i * BITS_PER_UNIT);
12392 *modconst = GEN_INT (imm);
12394 else
12396 unsigned HOST_WIDE_INT imm = 0;
12398 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12399 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12401 *modconst = GEN_INT (imm);
12405 return immtype;
12406 #undef CHECK
12409 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12410 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12411 float elements), and a modified constant (whatever should be output for a
12412 VMOV) in *MODCONST. */
12415 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12416 rtx *modconst, int *elementwidth)
12418 rtx tmpconst;
12419 int tmpwidth;
12420 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12422 if (retval == -1)
12423 return 0;
12425 if (modconst)
12426 *modconst = tmpconst;
12428 if (elementwidth)
12429 *elementwidth = tmpwidth;
12431 return 1;
12434 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12435 the immediate is valid, write a constant suitable for using as an operand
12436 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12437 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12440 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12441 rtx *modconst, int *elementwidth)
12443 rtx tmpconst;
12444 int tmpwidth;
12445 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12447 if (retval < 0 || retval > 5)
12448 return 0;
12450 if (modconst)
12451 *modconst = tmpconst;
12453 if (elementwidth)
12454 *elementwidth = tmpwidth;
12456 return 1;
12459 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12460 the immediate is valid, write a constant suitable for using as an operand
12461 to VSHR/VSHL to *MODCONST and the corresponding element width to
12462 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12463 because they have different limitations. */
12466 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12467 rtx *modconst, int *elementwidth,
12468 bool isleftshift)
12470 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12471 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12472 unsigned HOST_WIDE_INT last_elt = 0;
12473 unsigned HOST_WIDE_INT maxshift;
12475 /* Split vector constant out into a byte vector. */
12476 for (i = 0; i < n_elts; i++)
12478 rtx el = CONST_VECTOR_ELT (op, i);
12479 unsigned HOST_WIDE_INT elpart;
12481 if (CONST_INT_P (el))
12482 elpart = INTVAL (el);
12483 else if (CONST_DOUBLE_P (el))
12484 return 0;
12485 else
12486 gcc_unreachable ();
12488 if (i != 0 && elpart != last_elt)
12489 return 0;
12491 last_elt = elpart;
12494 /* Shift less than element size. */
12495 maxshift = innersize * 8;
12497 if (isleftshift)
12499 /* Left shift immediate value can be from 0 to <size>-1. */
12500 if (last_elt >= maxshift)
12501 return 0;
12503 else
12505 /* Right shift immediate value can be from 1 to <size>. */
12506 if (last_elt == 0 || last_elt > maxshift)
12507 return 0;
12510 if (elementwidth)
12511 *elementwidth = innersize * 8;
12513 if (modconst)
12514 *modconst = CONST_VECTOR_ELT (op, 0);
12516 return 1;
12519 /* Return a string suitable for output of Neon immediate logic operation
12520 MNEM. */
12522 char *
12523 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12524 int inverse, int quad)
12526 int width, is_valid;
12527 static char templ[40];
12529 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12531 gcc_assert (is_valid != 0);
12533 if (quad)
12534 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12535 else
12536 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12538 return templ;
12541 /* Return a string suitable for output of Neon immediate shift operation
12542 (VSHR or VSHL) MNEM. */
12544 char *
12545 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12546 machine_mode mode, int quad,
12547 bool isleftshift)
12549 int width, is_valid;
12550 static char templ[40];
12552 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12553 gcc_assert (is_valid != 0);
12555 if (quad)
12556 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12557 else
12558 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12560 return templ;
12563 /* Output a sequence of pairwise operations to implement a reduction.
12564 NOTE: We do "too much work" here, because pairwise operations work on two
12565 registers-worth of operands in one go. Unfortunately we can't exploit those
12566 extra calculations to do the full operation in fewer steps, I don't think.
12567 Although all vector elements of the result but the first are ignored, we
12568 actually calculate the same result in each of the elements. An alternative
12569 such as initially loading a vector with zero to use as each of the second
12570 operands would use up an additional register and take an extra instruction,
12571 for no particular gain. */
12573 void
12574 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12575 rtx (*reduc) (rtx, rtx, rtx))
12577 machine_mode inner = GET_MODE_INNER (mode);
12578 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12579 rtx tmpsum = op1;
12581 for (i = parts / 2; i >= 1; i /= 2)
12583 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12584 emit_insn (reduc (dest, tmpsum, tmpsum));
12585 tmpsum = dest;
12589 /* If VALS is a vector constant that can be loaded into a register
12590 using VDUP, generate instructions to do so and return an RTX to
12591 assign to the register. Otherwise return NULL_RTX. */
12593 static rtx
12594 neon_vdup_constant (rtx vals)
12596 machine_mode mode = GET_MODE (vals);
12597 machine_mode inner_mode = GET_MODE_INNER (mode);
12598 int n_elts = GET_MODE_NUNITS (mode);
12599 bool all_same = true;
12600 rtx x;
12601 int i;
12603 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12604 return NULL_RTX;
12606 for (i = 0; i < n_elts; ++i)
12608 x = XVECEXP (vals, 0, i);
12609 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12610 all_same = false;
12613 if (!all_same)
12614 /* The elements are not all the same. We could handle repeating
12615 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12616 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12617 vdup.i16). */
12618 return NULL_RTX;
12620 /* We can load this constant by using VDUP and a constant in a
12621 single ARM register. This will be cheaper than a vector
12622 load. */
12624 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12625 return gen_rtx_VEC_DUPLICATE (mode, x);
12628 /* Generate code to load VALS, which is a PARALLEL containing only
12629 constants (for vec_init) or CONST_VECTOR, efficiently into a
12630 register. Returns an RTX to copy into the register, or NULL_RTX
12631 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12634 neon_make_constant (rtx vals)
12636 machine_mode mode = GET_MODE (vals);
12637 rtx target;
12638 rtx const_vec = NULL_RTX;
12639 int n_elts = GET_MODE_NUNITS (mode);
12640 int n_const = 0;
12641 int i;
12643 if (GET_CODE (vals) == CONST_VECTOR)
12644 const_vec = vals;
12645 else if (GET_CODE (vals) == PARALLEL)
12647 /* A CONST_VECTOR must contain only CONST_INTs and
12648 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12649 Only store valid constants in a CONST_VECTOR. */
12650 for (i = 0; i < n_elts; ++i)
12652 rtx x = XVECEXP (vals, 0, i);
12653 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12654 n_const++;
12656 if (n_const == n_elts)
12657 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12659 else
12660 gcc_unreachable ();
12662 if (const_vec != NULL
12663 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12664 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12665 return const_vec;
12666 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12667 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12668 pipeline cycle; creating the constant takes one or two ARM
12669 pipeline cycles. */
12670 return target;
12671 else if (const_vec != NULL_RTX)
12672 /* Load from constant pool. On Cortex-A8 this takes two cycles
12673 (for either double or quad vectors). We can not take advantage
12674 of single-cycle VLD1 because we need a PC-relative addressing
12675 mode. */
12676 return const_vec;
12677 else
12678 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12679 We can not construct an initializer. */
12680 return NULL_RTX;
12683 /* Initialize vector TARGET to VALS. */
12685 void
12686 neon_expand_vector_init (rtx target, rtx vals)
12688 machine_mode mode = GET_MODE (target);
12689 machine_mode inner_mode = GET_MODE_INNER (mode);
12690 int n_elts = GET_MODE_NUNITS (mode);
12691 int n_var = 0, one_var = -1;
12692 bool all_same = true;
12693 rtx x, mem;
12694 int i;
12696 for (i = 0; i < n_elts; ++i)
12698 x = XVECEXP (vals, 0, i);
12699 if (!CONSTANT_P (x))
12700 ++n_var, one_var = i;
12702 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12703 all_same = false;
12706 if (n_var == 0)
12708 rtx constant = neon_make_constant (vals);
12709 if (constant != NULL_RTX)
12711 emit_move_insn (target, constant);
12712 return;
12716 /* Splat a single non-constant element if we can. */
12717 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12719 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12720 emit_insn (gen_rtx_SET (VOIDmode, target,
12721 gen_rtx_VEC_DUPLICATE (mode, x)));
12722 return;
12725 /* One field is non-constant. Load constant then overwrite varying
12726 field. This is more efficient than using the stack. */
12727 if (n_var == 1)
12729 rtx copy = copy_rtx (vals);
12730 rtx index = GEN_INT (one_var);
12732 /* Load constant part of vector, substitute neighboring value for
12733 varying element. */
12734 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12735 neon_expand_vector_init (target, copy);
12737 /* Insert variable. */
12738 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12739 switch (mode)
12741 case V8QImode:
12742 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12743 break;
12744 case V16QImode:
12745 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12746 break;
12747 case V4HImode:
12748 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12749 break;
12750 case V8HImode:
12751 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12752 break;
12753 case V2SImode:
12754 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12755 break;
12756 case V4SImode:
12757 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12758 break;
12759 case V2SFmode:
12760 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12761 break;
12762 case V4SFmode:
12763 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12764 break;
12765 case V2DImode:
12766 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12767 break;
12768 default:
12769 gcc_unreachable ();
12771 return;
12774 /* Construct the vector in memory one field at a time
12775 and load the whole vector. */
12776 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12777 for (i = 0; i < n_elts; i++)
12778 emit_move_insn (adjust_address_nv (mem, inner_mode,
12779 i * GET_MODE_SIZE (inner_mode)),
12780 XVECEXP (vals, 0, i));
12781 emit_move_insn (target, mem);
12784 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12785 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12786 reported source locations are bogus. */
12788 static void
12789 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12790 const char *err)
12792 HOST_WIDE_INT lane;
12794 gcc_assert (CONST_INT_P (operand));
12796 lane = INTVAL (operand);
12798 if (lane < low || lane >= high)
12799 error (err);
12802 /* Bounds-check lanes. */
12804 void
12805 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12807 bounds_check (operand, low, high, "lane out of range");
12810 /* Bounds-check constants. */
12812 void
12813 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12815 bounds_check (operand, low, high, "constant out of range");
12818 HOST_WIDE_INT
12819 neon_element_bits (machine_mode mode)
12821 if (mode == DImode)
12822 return GET_MODE_BITSIZE (mode);
12823 else
12824 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12828 /* Predicates for `match_operand' and `match_operator'. */
12830 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12831 WB is true if full writeback address modes are allowed and is false
12832 if limited writeback address modes (POST_INC and PRE_DEC) are
12833 allowed. */
12836 arm_coproc_mem_operand (rtx op, bool wb)
12838 rtx ind;
12840 /* Reject eliminable registers. */
12841 if (! (reload_in_progress || reload_completed || lra_in_progress)
12842 && ( reg_mentioned_p (frame_pointer_rtx, op)
12843 || reg_mentioned_p (arg_pointer_rtx, op)
12844 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12845 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12846 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12847 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12848 return FALSE;
12850 /* Constants are converted into offsets from labels. */
12851 if (!MEM_P (op))
12852 return FALSE;
12854 ind = XEXP (op, 0);
12856 if (reload_completed
12857 && (GET_CODE (ind) == LABEL_REF
12858 || (GET_CODE (ind) == CONST
12859 && GET_CODE (XEXP (ind, 0)) == PLUS
12860 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12861 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12862 return TRUE;
12864 /* Match: (mem (reg)). */
12865 if (REG_P (ind))
12866 return arm_address_register_rtx_p (ind, 0);
12868 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12869 acceptable in any case (subject to verification by
12870 arm_address_register_rtx_p). We need WB to be true to accept
12871 PRE_INC and POST_DEC. */
12872 if (GET_CODE (ind) == POST_INC
12873 || GET_CODE (ind) == PRE_DEC
12874 || (wb
12875 && (GET_CODE (ind) == PRE_INC
12876 || GET_CODE (ind) == POST_DEC)))
12877 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12879 if (wb
12880 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12881 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12882 && GET_CODE (XEXP (ind, 1)) == PLUS
12883 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12884 ind = XEXP (ind, 1);
12886 /* Match:
12887 (plus (reg)
12888 (const)). */
12889 if (GET_CODE (ind) == PLUS
12890 && REG_P (XEXP (ind, 0))
12891 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12892 && CONST_INT_P (XEXP (ind, 1))
12893 && INTVAL (XEXP (ind, 1)) > -1024
12894 && INTVAL (XEXP (ind, 1)) < 1024
12895 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12896 return TRUE;
12898 return FALSE;
12901 /* Return TRUE if OP is a memory operand which we can load or store a vector
12902 to/from. TYPE is one of the following values:
12903 0 - Vector load/stor (vldr)
12904 1 - Core registers (ldm)
12905 2 - Element/structure loads (vld1)
12908 neon_vector_mem_operand (rtx op, int type, bool strict)
12910 rtx ind;
12912 /* Reject eliminable registers. */
12913 if (! (reload_in_progress || reload_completed)
12914 && ( reg_mentioned_p (frame_pointer_rtx, op)
12915 || reg_mentioned_p (arg_pointer_rtx, op)
12916 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12917 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12918 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12919 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12920 return !strict;
12922 /* Constants are converted into offsets from labels. */
12923 if (!MEM_P (op))
12924 return FALSE;
12926 ind = XEXP (op, 0);
12928 if (reload_completed
12929 && (GET_CODE (ind) == LABEL_REF
12930 || (GET_CODE (ind) == CONST
12931 && GET_CODE (XEXP (ind, 0)) == PLUS
12932 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12933 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12934 return TRUE;
12936 /* Match: (mem (reg)). */
12937 if (REG_P (ind))
12938 return arm_address_register_rtx_p (ind, 0);
12940 /* Allow post-increment with Neon registers. */
12941 if ((type != 1 && GET_CODE (ind) == POST_INC)
12942 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12943 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12945 /* Allow post-increment by register for VLDn */
12946 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12947 && GET_CODE (XEXP (ind, 1)) == PLUS
12948 && REG_P (XEXP (XEXP (ind, 1), 1)))
12949 return true;
12951 /* Match:
12952 (plus (reg)
12953 (const)). */
12954 if (type == 0
12955 && GET_CODE (ind) == PLUS
12956 && REG_P (XEXP (ind, 0))
12957 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12958 && CONST_INT_P (XEXP (ind, 1))
12959 && INTVAL (XEXP (ind, 1)) > -1024
12960 /* For quad modes, we restrict the constant offset to be slightly less
12961 than what the instruction format permits. We have no such constraint
12962 on double mode offsets. (This must match arm_legitimate_index_p.) */
12963 && (INTVAL (XEXP (ind, 1))
12964 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12965 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12966 return TRUE;
12968 return FALSE;
12971 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12972 type. */
12974 neon_struct_mem_operand (rtx op)
12976 rtx ind;
12978 /* Reject eliminable registers. */
12979 if (! (reload_in_progress || reload_completed)
12980 && ( reg_mentioned_p (frame_pointer_rtx, op)
12981 || reg_mentioned_p (arg_pointer_rtx, op)
12982 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12983 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12984 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12985 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12986 return FALSE;
12988 /* Constants are converted into offsets from labels. */
12989 if (!MEM_P (op))
12990 return FALSE;
12992 ind = XEXP (op, 0);
12994 if (reload_completed
12995 && (GET_CODE (ind) == LABEL_REF
12996 || (GET_CODE (ind) == CONST
12997 && GET_CODE (XEXP (ind, 0)) == PLUS
12998 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12999 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13000 return TRUE;
13002 /* Match: (mem (reg)). */
13003 if (REG_P (ind))
13004 return arm_address_register_rtx_p (ind, 0);
13006 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13007 if (GET_CODE (ind) == POST_INC
13008 || GET_CODE (ind) == PRE_DEC)
13009 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13011 return FALSE;
13014 /* Return true if X is a register that will be eliminated later on. */
13016 arm_eliminable_register (rtx x)
13018 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13019 || REGNO (x) == ARG_POINTER_REGNUM
13020 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13021 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13024 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13025 coprocessor registers. Otherwise return NO_REGS. */
13027 enum reg_class
13028 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13030 if (mode == HFmode)
13032 if (!TARGET_NEON_FP16)
13033 return GENERAL_REGS;
13034 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13035 return NO_REGS;
13036 return GENERAL_REGS;
13039 /* The neon move patterns handle all legitimate vector and struct
13040 addresses. */
13041 if (TARGET_NEON
13042 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13043 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13044 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13045 || VALID_NEON_STRUCT_MODE (mode)))
13046 return NO_REGS;
13048 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13049 return NO_REGS;
13051 return GENERAL_REGS;
13054 /* Values which must be returned in the most-significant end of the return
13055 register. */
13057 static bool
13058 arm_return_in_msb (const_tree valtype)
13060 return (TARGET_AAPCS_BASED
13061 && BYTES_BIG_ENDIAN
13062 && (AGGREGATE_TYPE_P (valtype)
13063 || TREE_CODE (valtype) == COMPLEX_TYPE
13064 || FIXED_POINT_TYPE_P (valtype)));
13067 /* Return TRUE if X references a SYMBOL_REF. */
13069 symbol_mentioned_p (rtx x)
13071 const char * fmt;
13072 int i;
13074 if (GET_CODE (x) == SYMBOL_REF)
13075 return 1;
13077 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13078 are constant offsets, not symbols. */
13079 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13080 return 0;
13082 fmt = GET_RTX_FORMAT (GET_CODE (x));
13084 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13086 if (fmt[i] == 'E')
13088 int j;
13090 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13091 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13092 return 1;
13094 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13095 return 1;
13098 return 0;
13101 /* Return TRUE if X references a LABEL_REF. */
13103 label_mentioned_p (rtx x)
13105 const char * fmt;
13106 int i;
13108 if (GET_CODE (x) == LABEL_REF)
13109 return 1;
13111 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13112 instruction, but they are constant offsets, not symbols. */
13113 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13114 return 0;
13116 fmt = GET_RTX_FORMAT (GET_CODE (x));
13117 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13119 if (fmt[i] == 'E')
13121 int j;
13123 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13124 if (label_mentioned_p (XVECEXP (x, i, j)))
13125 return 1;
13127 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13128 return 1;
13131 return 0;
13135 tls_mentioned_p (rtx x)
13137 switch (GET_CODE (x))
13139 case CONST:
13140 return tls_mentioned_p (XEXP (x, 0));
13142 case UNSPEC:
13143 if (XINT (x, 1) == UNSPEC_TLS)
13144 return 1;
13146 default:
13147 return 0;
13151 /* Must not copy any rtx that uses a pc-relative address. */
13153 static bool
13154 arm_cannot_copy_insn_p (rtx_insn *insn)
13156 /* The tls call insn cannot be copied, as it is paired with a data
13157 word. */
13158 if (recog_memoized (insn) == CODE_FOR_tlscall)
13159 return true;
13161 subrtx_iterator::array_type array;
13162 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13164 const_rtx x = *iter;
13165 if (GET_CODE (x) == UNSPEC
13166 && (XINT (x, 1) == UNSPEC_PIC_BASE
13167 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13168 return true;
13170 return false;
13173 enum rtx_code
13174 minmax_code (rtx x)
13176 enum rtx_code code = GET_CODE (x);
13178 switch (code)
13180 case SMAX:
13181 return GE;
13182 case SMIN:
13183 return LE;
13184 case UMIN:
13185 return LEU;
13186 case UMAX:
13187 return GEU;
13188 default:
13189 gcc_unreachable ();
13193 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13195 bool
13196 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13197 int *mask, bool *signed_sat)
13199 /* The high bound must be a power of two minus one. */
13200 int log = exact_log2 (INTVAL (hi_bound) + 1);
13201 if (log == -1)
13202 return false;
13204 /* The low bound is either zero (for usat) or one less than the
13205 negation of the high bound (for ssat). */
13206 if (INTVAL (lo_bound) == 0)
13208 if (mask)
13209 *mask = log;
13210 if (signed_sat)
13211 *signed_sat = false;
13213 return true;
13216 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13218 if (mask)
13219 *mask = log + 1;
13220 if (signed_sat)
13221 *signed_sat = true;
13223 return true;
13226 return false;
13229 /* Return 1 if memory locations are adjacent. */
13231 adjacent_mem_locations (rtx a, rtx b)
13233 /* We don't guarantee to preserve the order of these memory refs. */
13234 if (volatile_refs_p (a) || volatile_refs_p (b))
13235 return 0;
13237 if ((REG_P (XEXP (a, 0))
13238 || (GET_CODE (XEXP (a, 0)) == PLUS
13239 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13240 && (REG_P (XEXP (b, 0))
13241 || (GET_CODE (XEXP (b, 0)) == PLUS
13242 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13244 HOST_WIDE_INT val0 = 0, val1 = 0;
13245 rtx reg0, reg1;
13246 int val_diff;
13248 if (GET_CODE (XEXP (a, 0)) == PLUS)
13250 reg0 = XEXP (XEXP (a, 0), 0);
13251 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13253 else
13254 reg0 = XEXP (a, 0);
13256 if (GET_CODE (XEXP (b, 0)) == PLUS)
13258 reg1 = XEXP (XEXP (b, 0), 0);
13259 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13261 else
13262 reg1 = XEXP (b, 0);
13264 /* Don't accept any offset that will require multiple
13265 instructions to handle, since this would cause the
13266 arith_adjacentmem pattern to output an overlong sequence. */
13267 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13268 return 0;
13270 /* Don't allow an eliminable register: register elimination can make
13271 the offset too large. */
13272 if (arm_eliminable_register (reg0))
13273 return 0;
13275 val_diff = val1 - val0;
13277 if (arm_ld_sched)
13279 /* If the target has load delay slots, then there's no benefit
13280 to using an ldm instruction unless the offset is zero and
13281 we are optimizing for size. */
13282 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13283 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13284 && (val_diff == 4 || val_diff == -4));
13287 return ((REGNO (reg0) == REGNO (reg1))
13288 && (val_diff == 4 || val_diff == -4));
13291 return 0;
13294 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13295 for load operations, false for store operations. CONSECUTIVE is true
13296 if the register numbers in the operation must be consecutive in the register
13297 bank. RETURN_PC is true if value is to be loaded in PC.
13298 The pattern we are trying to match for load is:
13299 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13300 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13303 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13305 where
13306 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13307 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13308 3. If consecutive is TRUE, then for kth register being loaded,
13309 REGNO (R_dk) = REGNO (R_d0) + k.
13310 The pattern for store is similar. */
13311 bool
13312 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13313 bool consecutive, bool return_pc)
13315 HOST_WIDE_INT count = XVECLEN (op, 0);
13316 rtx reg, mem, addr;
13317 unsigned regno;
13318 unsigned first_regno;
13319 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13320 rtx elt;
13321 bool addr_reg_in_reglist = false;
13322 bool update = false;
13323 int reg_increment;
13324 int offset_adj;
13325 int regs_per_val;
13327 /* If not in SImode, then registers must be consecutive
13328 (e.g., VLDM instructions for DFmode). */
13329 gcc_assert ((mode == SImode) || consecutive);
13330 /* Setting return_pc for stores is illegal. */
13331 gcc_assert (!return_pc || load);
13333 /* Set up the increments and the regs per val based on the mode. */
13334 reg_increment = GET_MODE_SIZE (mode);
13335 regs_per_val = reg_increment / 4;
13336 offset_adj = return_pc ? 1 : 0;
13338 if (count <= 1
13339 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13340 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13341 return false;
13343 /* Check if this is a write-back. */
13344 elt = XVECEXP (op, 0, offset_adj);
13345 if (GET_CODE (SET_SRC (elt)) == PLUS)
13347 i++;
13348 base = 1;
13349 update = true;
13351 /* The offset adjustment must be the number of registers being
13352 popped times the size of a single register. */
13353 if (!REG_P (SET_DEST (elt))
13354 || !REG_P (XEXP (SET_SRC (elt), 0))
13355 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13356 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13357 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13358 ((count - 1 - offset_adj) * reg_increment))
13359 return false;
13362 i = i + offset_adj;
13363 base = base + offset_adj;
13364 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13365 success depends on the type: VLDM can do just one reg,
13366 LDM must do at least two. */
13367 if ((count <= i) && (mode == SImode))
13368 return false;
13370 elt = XVECEXP (op, 0, i - 1);
13371 if (GET_CODE (elt) != SET)
13372 return false;
13374 if (load)
13376 reg = SET_DEST (elt);
13377 mem = SET_SRC (elt);
13379 else
13381 reg = SET_SRC (elt);
13382 mem = SET_DEST (elt);
13385 if (!REG_P (reg) || !MEM_P (mem))
13386 return false;
13388 regno = REGNO (reg);
13389 first_regno = regno;
13390 addr = XEXP (mem, 0);
13391 if (GET_CODE (addr) == PLUS)
13393 if (!CONST_INT_P (XEXP (addr, 1)))
13394 return false;
13396 offset = INTVAL (XEXP (addr, 1));
13397 addr = XEXP (addr, 0);
13400 if (!REG_P (addr))
13401 return false;
13403 /* Don't allow SP to be loaded unless it is also the base register. It
13404 guarantees that SP is reset correctly when an LDM instruction
13405 is interrupted. Otherwise, we might end up with a corrupt stack. */
13406 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13407 return false;
13409 for (; i < count; i++)
13411 elt = XVECEXP (op, 0, i);
13412 if (GET_CODE (elt) != SET)
13413 return false;
13415 if (load)
13417 reg = SET_DEST (elt);
13418 mem = SET_SRC (elt);
13420 else
13422 reg = SET_SRC (elt);
13423 mem = SET_DEST (elt);
13426 if (!REG_P (reg)
13427 || GET_MODE (reg) != mode
13428 || REGNO (reg) <= regno
13429 || (consecutive
13430 && (REGNO (reg) !=
13431 (unsigned int) (first_regno + regs_per_val * (i - base))))
13432 /* Don't allow SP to be loaded unless it is also the base register. It
13433 guarantees that SP is reset correctly when an LDM instruction
13434 is interrupted. Otherwise, we might end up with a corrupt stack. */
13435 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13436 || !MEM_P (mem)
13437 || GET_MODE (mem) != mode
13438 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13439 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13440 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13441 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13442 offset + (i - base) * reg_increment))
13443 && (!REG_P (XEXP (mem, 0))
13444 || offset + (i - base) * reg_increment != 0)))
13445 return false;
13447 regno = REGNO (reg);
13448 if (regno == REGNO (addr))
13449 addr_reg_in_reglist = true;
13452 if (load)
13454 if (update && addr_reg_in_reglist)
13455 return false;
13457 /* For Thumb-1, address register is always modified - either by write-back
13458 or by explicit load. If the pattern does not describe an update,
13459 then the address register must be in the list of loaded registers. */
13460 if (TARGET_THUMB1)
13461 return update || addr_reg_in_reglist;
13464 return true;
13467 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13468 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13469 instruction. ADD_OFFSET is nonzero if the base address register needs
13470 to be modified with an add instruction before we can use it. */
13472 static bool
13473 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13474 int nops, HOST_WIDE_INT add_offset)
13476 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13477 if the offset isn't small enough. The reason 2 ldrs are faster
13478 is because these ARMs are able to do more than one cache access
13479 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13480 whilst the ARM8 has a double bandwidth cache. This means that
13481 these cores can do both an instruction fetch and a data fetch in
13482 a single cycle, so the trick of calculating the address into a
13483 scratch register (one of the result regs) and then doing a load
13484 multiple actually becomes slower (and no smaller in code size).
13485 That is the transformation
13487 ldr rd1, [rbase + offset]
13488 ldr rd2, [rbase + offset + 4]
13492 add rd1, rbase, offset
13493 ldmia rd1, {rd1, rd2}
13495 produces worse code -- '3 cycles + any stalls on rd2' instead of
13496 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13497 access per cycle, the first sequence could never complete in less
13498 than 6 cycles, whereas the ldm sequence would only take 5 and
13499 would make better use of sequential accesses if not hitting the
13500 cache.
13502 We cheat here and test 'arm_ld_sched' which we currently know to
13503 only be true for the ARM8, ARM9 and StrongARM. If this ever
13504 changes, then the test below needs to be reworked. */
13505 if (nops == 2 && arm_ld_sched && add_offset != 0)
13506 return false;
13508 /* XScale has load-store double instructions, but they have stricter
13509 alignment requirements than load-store multiple, so we cannot
13510 use them.
13512 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13513 the pipeline until completion.
13515 NREGS CYCLES
13521 An ldr instruction takes 1-3 cycles, but does not block the
13522 pipeline.
13524 NREGS CYCLES
13525 1 1-3
13526 2 2-6
13527 3 3-9
13528 4 4-12
13530 Best case ldr will always win. However, the more ldr instructions
13531 we issue, the less likely we are to be able to schedule them well.
13532 Using ldr instructions also increases code size.
13534 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13535 for counts of 3 or 4 regs. */
13536 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13537 return false;
13538 return true;
13541 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13542 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13543 an array ORDER which describes the sequence to use when accessing the
13544 offsets that produces an ascending order. In this sequence, each
13545 offset must be larger by exactly 4 than the previous one. ORDER[0]
13546 must have been filled in with the lowest offset by the caller.
13547 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13548 we use to verify that ORDER produces an ascending order of registers.
13549 Return true if it was possible to construct such an order, false if
13550 not. */
13552 static bool
13553 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13554 int *unsorted_regs)
13556 int i;
13557 for (i = 1; i < nops; i++)
13559 int j;
13561 order[i] = order[i - 1];
13562 for (j = 0; j < nops; j++)
13563 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13565 /* We must find exactly one offset that is higher than the
13566 previous one by 4. */
13567 if (order[i] != order[i - 1])
13568 return false;
13569 order[i] = j;
13571 if (order[i] == order[i - 1])
13572 return false;
13573 /* The register numbers must be ascending. */
13574 if (unsorted_regs != NULL
13575 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13576 return false;
13578 return true;
13581 /* Used to determine in a peephole whether a sequence of load
13582 instructions can be changed into a load-multiple instruction.
13583 NOPS is the number of separate load instructions we are examining. The
13584 first NOPS entries in OPERANDS are the destination registers, the
13585 next NOPS entries are memory operands. If this function is
13586 successful, *BASE is set to the common base register of the memory
13587 accesses; *LOAD_OFFSET is set to the first memory location's offset
13588 from that base register.
13589 REGS is an array filled in with the destination register numbers.
13590 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13591 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13592 the sequence of registers in REGS matches the loads from ascending memory
13593 locations, and the function verifies that the register numbers are
13594 themselves ascending. If CHECK_REGS is false, the register numbers
13595 are stored in the order they are found in the operands. */
13596 static int
13597 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13598 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13600 int unsorted_regs[MAX_LDM_STM_OPS];
13601 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13602 int order[MAX_LDM_STM_OPS];
13603 rtx base_reg_rtx = NULL;
13604 int base_reg = -1;
13605 int i, ldm_case;
13607 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13608 easily extended if required. */
13609 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13611 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13613 /* Loop over the operands and check that the memory references are
13614 suitable (i.e. immediate offsets from the same base register). At
13615 the same time, extract the target register, and the memory
13616 offsets. */
13617 for (i = 0; i < nops; i++)
13619 rtx reg;
13620 rtx offset;
13622 /* Convert a subreg of a mem into the mem itself. */
13623 if (GET_CODE (operands[nops + i]) == SUBREG)
13624 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13626 gcc_assert (MEM_P (operands[nops + i]));
13628 /* Don't reorder volatile memory references; it doesn't seem worth
13629 looking for the case where the order is ok anyway. */
13630 if (MEM_VOLATILE_P (operands[nops + i]))
13631 return 0;
13633 offset = const0_rtx;
13635 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13636 || (GET_CODE (reg) == SUBREG
13637 && REG_P (reg = SUBREG_REG (reg))))
13638 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13639 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13640 || (GET_CODE (reg) == SUBREG
13641 && REG_P (reg = SUBREG_REG (reg))))
13642 && (CONST_INT_P (offset
13643 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13645 if (i == 0)
13647 base_reg = REGNO (reg);
13648 base_reg_rtx = reg;
13649 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13650 return 0;
13652 else if (base_reg != (int) REGNO (reg))
13653 /* Not addressed from the same base register. */
13654 return 0;
13656 unsorted_regs[i] = (REG_P (operands[i])
13657 ? REGNO (operands[i])
13658 : REGNO (SUBREG_REG (operands[i])));
13660 /* If it isn't an integer register, or if it overwrites the
13661 base register but isn't the last insn in the list, then
13662 we can't do this. */
13663 if (unsorted_regs[i] < 0
13664 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13665 || unsorted_regs[i] > 14
13666 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13667 return 0;
13669 /* Don't allow SP to be loaded unless it is also the base
13670 register. It guarantees that SP is reset correctly when
13671 an LDM instruction is interrupted. Otherwise, we might
13672 end up with a corrupt stack. */
13673 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13674 return 0;
13676 unsorted_offsets[i] = INTVAL (offset);
13677 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13678 order[0] = i;
13680 else
13681 /* Not a suitable memory address. */
13682 return 0;
13685 /* All the useful information has now been extracted from the
13686 operands into unsorted_regs and unsorted_offsets; additionally,
13687 order[0] has been set to the lowest offset in the list. Sort
13688 the offsets into order, verifying that they are adjacent, and
13689 check that the register numbers are ascending. */
13690 if (!compute_offset_order (nops, unsorted_offsets, order,
13691 check_regs ? unsorted_regs : NULL))
13692 return 0;
13694 if (saved_order)
13695 memcpy (saved_order, order, sizeof order);
13697 if (base)
13699 *base = base_reg;
13701 for (i = 0; i < nops; i++)
13702 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13704 *load_offset = unsorted_offsets[order[0]];
13707 if (TARGET_THUMB1
13708 && !peep2_reg_dead_p (nops, base_reg_rtx))
13709 return 0;
13711 if (unsorted_offsets[order[0]] == 0)
13712 ldm_case = 1; /* ldmia */
13713 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13714 ldm_case = 2; /* ldmib */
13715 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13716 ldm_case = 3; /* ldmda */
13717 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13718 ldm_case = 4; /* ldmdb */
13719 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13720 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13721 ldm_case = 5;
13722 else
13723 return 0;
13725 if (!multiple_operation_profitable_p (false, nops,
13726 ldm_case == 5
13727 ? unsorted_offsets[order[0]] : 0))
13728 return 0;
13730 return ldm_case;
13733 /* Used to determine in a peephole whether a sequence of store instructions can
13734 be changed into a store-multiple instruction.
13735 NOPS is the number of separate store instructions we are examining.
13736 NOPS_TOTAL is the total number of instructions recognized by the peephole
13737 pattern.
13738 The first NOPS entries in OPERANDS are the source registers, the next
13739 NOPS entries are memory operands. If this function is successful, *BASE is
13740 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13741 to the first memory location's offset from that base register. REGS is an
13742 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13743 likewise filled with the corresponding rtx's.
13744 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13745 numbers to an ascending order of stores.
13746 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13747 from ascending memory locations, and the function verifies that the register
13748 numbers are themselves ascending. If CHECK_REGS is false, the register
13749 numbers are stored in the order they are found in the operands. */
13750 static int
13751 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13752 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13753 HOST_WIDE_INT *load_offset, bool check_regs)
13755 int unsorted_regs[MAX_LDM_STM_OPS];
13756 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13757 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13758 int order[MAX_LDM_STM_OPS];
13759 int base_reg = -1;
13760 rtx base_reg_rtx = NULL;
13761 int i, stm_case;
13763 /* Write back of base register is currently only supported for Thumb 1. */
13764 int base_writeback = TARGET_THUMB1;
13766 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13767 easily extended if required. */
13768 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13770 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13772 /* Loop over the operands and check that the memory references are
13773 suitable (i.e. immediate offsets from the same base register). At
13774 the same time, extract the target register, and the memory
13775 offsets. */
13776 for (i = 0; i < nops; i++)
13778 rtx reg;
13779 rtx offset;
13781 /* Convert a subreg of a mem into the mem itself. */
13782 if (GET_CODE (operands[nops + i]) == SUBREG)
13783 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13785 gcc_assert (MEM_P (operands[nops + i]));
13787 /* Don't reorder volatile memory references; it doesn't seem worth
13788 looking for the case where the order is ok anyway. */
13789 if (MEM_VOLATILE_P (operands[nops + i]))
13790 return 0;
13792 offset = const0_rtx;
13794 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13795 || (GET_CODE (reg) == SUBREG
13796 && REG_P (reg = SUBREG_REG (reg))))
13797 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13798 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13799 || (GET_CODE (reg) == SUBREG
13800 && REG_P (reg = SUBREG_REG (reg))))
13801 && (CONST_INT_P (offset
13802 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13804 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13805 ? operands[i] : SUBREG_REG (operands[i]));
13806 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13808 if (i == 0)
13810 base_reg = REGNO (reg);
13811 base_reg_rtx = reg;
13812 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13813 return 0;
13815 else if (base_reg != (int) REGNO (reg))
13816 /* Not addressed from the same base register. */
13817 return 0;
13819 /* If it isn't an integer register, then we can't do this. */
13820 if (unsorted_regs[i] < 0
13821 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13822 /* The effects are unpredictable if the base register is
13823 both updated and stored. */
13824 || (base_writeback && unsorted_regs[i] == base_reg)
13825 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13826 || unsorted_regs[i] > 14)
13827 return 0;
13829 unsorted_offsets[i] = INTVAL (offset);
13830 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13831 order[0] = i;
13833 else
13834 /* Not a suitable memory address. */
13835 return 0;
13838 /* All the useful information has now been extracted from the
13839 operands into unsorted_regs and unsorted_offsets; additionally,
13840 order[0] has been set to the lowest offset in the list. Sort
13841 the offsets into order, verifying that they are adjacent, and
13842 check that the register numbers are ascending. */
13843 if (!compute_offset_order (nops, unsorted_offsets, order,
13844 check_regs ? unsorted_regs : NULL))
13845 return 0;
13847 if (saved_order)
13848 memcpy (saved_order, order, sizeof order);
13850 if (base)
13852 *base = base_reg;
13854 for (i = 0; i < nops; i++)
13856 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13857 if (reg_rtxs)
13858 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13861 *load_offset = unsorted_offsets[order[0]];
13864 if (TARGET_THUMB1
13865 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13866 return 0;
13868 if (unsorted_offsets[order[0]] == 0)
13869 stm_case = 1; /* stmia */
13870 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13871 stm_case = 2; /* stmib */
13872 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13873 stm_case = 3; /* stmda */
13874 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13875 stm_case = 4; /* stmdb */
13876 else
13877 return 0;
13879 if (!multiple_operation_profitable_p (false, nops, 0))
13880 return 0;
13882 return stm_case;
13885 /* Routines for use in generating RTL. */
13887 /* Generate a load-multiple instruction. COUNT is the number of loads in
13888 the instruction; REGS and MEMS are arrays containing the operands.
13889 BASEREG is the base register to be used in addressing the memory operands.
13890 WBACK_OFFSET is nonzero if the instruction should update the base
13891 register. */
13893 static rtx
13894 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13895 HOST_WIDE_INT wback_offset)
13897 int i = 0, j;
13898 rtx result;
13900 if (!multiple_operation_profitable_p (false, count, 0))
13902 rtx seq;
13904 start_sequence ();
13906 for (i = 0; i < count; i++)
13907 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13909 if (wback_offset != 0)
13910 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13912 seq = get_insns ();
13913 end_sequence ();
13915 return seq;
13918 result = gen_rtx_PARALLEL (VOIDmode,
13919 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13920 if (wback_offset != 0)
13922 XVECEXP (result, 0, 0)
13923 = gen_rtx_SET (VOIDmode, basereg,
13924 plus_constant (Pmode, basereg, wback_offset));
13925 i = 1;
13926 count++;
13929 for (j = 0; i < count; i++, j++)
13930 XVECEXP (result, 0, i)
13931 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13933 return result;
13936 /* Generate a store-multiple instruction. COUNT is the number of stores in
13937 the instruction; REGS and MEMS are arrays containing the operands.
13938 BASEREG is the base register to be used in addressing the memory operands.
13939 WBACK_OFFSET is nonzero if the instruction should update the base
13940 register. */
13942 static rtx
13943 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13944 HOST_WIDE_INT wback_offset)
13946 int i = 0, j;
13947 rtx result;
13949 if (GET_CODE (basereg) == PLUS)
13950 basereg = XEXP (basereg, 0);
13952 if (!multiple_operation_profitable_p (false, count, 0))
13954 rtx seq;
13956 start_sequence ();
13958 for (i = 0; i < count; i++)
13959 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13961 if (wback_offset != 0)
13962 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13964 seq = get_insns ();
13965 end_sequence ();
13967 return seq;
13970 result = gen_rtx_PARALLEL (VOIDmode,
13971 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13972 if (wback_offset != 0)
13974 XVECEXP (result, 0, 0)
13975 = gen_rtx_SET (VOIDmode, basereg,
13976 plus_constant (Pmode, basereg, wback_offset));
13977 i = 1;
13978 count++;
13981 for (j = 0; i < count; i++, j++)
13982 XVECEXP (result, 0, i)
13983 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
13985 return result;
13988 /* Generate either a load-multiple or a store-multiple instruction. This
13989 function can be used in situations where we can start with a single MEM
13990 rtx and adjust its address upwards.
13991 COUNT is the number of operations in the instruction, not counting a
13992 possible update of the base register. REGS is an array containing the
13993 register operands.
13994 BASEREG is the base register to be used in addressing the memory operands,
13995 which are constructed from BASEMEM.
13996 WRITE_BACK specifies whether the generated instruction should include an
13997 update of the base register.
13998 OFFSETP is used to pass an offset to and from this function; this offset
13999 is not used when constructing the address (instead BASEMEM should have an
14000 appropriate offset in its address), it is used only for setting
14001 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14003 static rtx
14004 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14005 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14007 rtx mems[MAX_LDM_STM_OPS];
14008 HOST_WIDE_INT offset = *offsetp;
14009 int i;
14011 gcc_assert (count <= MAX_LDM_STM_OPS);
14013 if (GET_CODE (basereg) == PLUS)
14014 basereg = XEXP (basereg, 0);
14016 for (i = 0; i < count; i++)
14018 rtx addr = plus_constant (Pmode, basereg, i * 4);
14019 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14020 offset += 4;
14023 if (write_back)
14024 *offsetp = offset;
14026 if (is_load)
14027 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14028 write_back ? 4 * count : 0);
14029 else
14030 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14031 write_back ? 4 * count : 0);
14035 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14036 rtx basemem, HOST_WIDE_INT *offsetp)
14038 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14039 offsetp);
14043 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14044 rtx basemem, HOST_WIDE_INT *offsetp)
14046 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14047 offsetp);
14050 /* Called from a peephole2 expander to turn a sequence of loads into an
14051 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14052 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14053 is true if we can reorder the registers because they are used commutatively
14054 subsequently.
14055 Returns true iff we could generate a new instruction. */
14057 bool
14058 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14060 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14061 rtx mems[MAX_LDM_STM_OPS];
14062 int i, j, base_reg;
14063 rtx base_reg_rtx;
14064 HOST_WIDE_INT offset;
14065 int write_back = FALSE;
14066 int ldm_case;
14067 rtx addr;
14069 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14070 &base_reg, &offset, !sort_regs);
14072 if (ldm_case == 0)
14073 return false;
14075 if (sort_regs)
14076 for (i = 0; i < nops - 1; i++)
14077 for (j = i + 1; j < nops; j++)
14078 if (regs[i] > regs[j])
14080 int t = regs[i];
14081 regs[i] = regs[j];
14082 regs[j] = t;
14084 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14086 if (TARGET_THUMB1)
14088 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14089 gcc_assert (ldm_case == 1 || ldm_case == 5);
14090 write_back = TRUE;
14093 if (ldm_case == 5)
14095 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14096 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14097 offset = 0;
14098 if (!TARGET_THUMB1)
14100 base_reg = regs[0];
14101 base_reg_rtx = newbase;
14105 for (i = 0; i < nops; i++)
14107 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14108 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14109 SImode, addr, 0);
14111 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14112 write_back ? offset + i * 4 : 0));
14113 return true;
14116 /* Called from a peephole2 expander to turn a sequence of stores into an
14117 STM instruction. OPERANDS are the operands found by the peephole matcher;
14118 NOPS indicates how many separate stores we are trying to combine.
14119 Returns true iff we could generate a new instruction. */
14121 bool
14122 gen_stm_seq (rtx *operands, int nops)
14124 int i;
14125 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14126 rtx mems[MAX_LDM_STM_OPS];
14127 int base_reg;
14128 rtx base_reg_rtx;
14129 HOST_WIDE_INT offset;
14130 int write_back = FALSE;
14131 int stm_case;
14132 rtx addr;
14133 bool base_reg_dies;
14135 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14136 mem_order, &base_reg, &offset, true);
14138 if (stm_case == 0)
14139 return false;
14141 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14143 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14144 if (TARGET_THUMB1)
14146 gcc_assert (base_reg_dies);
14147 write_back = TRUE;
14150 if (stm_case == 5)
14152 gcc_assert (base_reg_dies);
14153 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14154 offset = 0;
14157 addr = plus_constant (Pmode, base_reg_rtx, offset);
14159 for (i = 0; i < nops; i++)
14161 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14162 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14163 SImode, addr, 0);
14165 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14166 write_back ? offset + i * 4 : 0));
14167 return true;
14170 /* Called from a peephole2 expander to turn a sequence of stores that are
14171 preceded by constant loads into an STM instruction. OPERANDS are the
14172 operands found by the peephole matcher; NOPS indicates how many
14173 separate stores we are trying to combine; there are 2 * NOPS
14174 instructions in the peephole.
14175 Returns true iff we could generate a new instruction. */
14177 bool
14178 gen_const_stm_seq (rtx *operands, int nops)
14180 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14181 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14182 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14183 rtx mems[MAX_LDM_STM_OPS];
14184 int base_reg;
14185 rtx base_reg_rtx;
14186 HOST_WIDE_INT offset;
14187 int write_back = FALSE;
14188 int stm_case;
14189 rtx addr;
14190 bool base_reg_dies;
14191 int i, j;
14192 HARD_REG_SET allocated;
14194 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14195 mem_order, &base_reg, &offset, false);
14197 if (stm_case == 0)
14198 return false;
14200 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14202 /* If the same register is used more than once, try to find a free
14203 register. */
14204 CLEAR_HARD_REG_SET (allocated);
14205 for (i = 0; i < nops; i++)
14207 for (j = i + 1; j < nops; j++)
14208 if (regs[i] == regs[j])
14210 rtx t = peep2_find_free_register (0, nops * 2,
14211 TARGET_THUMB1 ? "l" : "r",
14212 SImode, &allocated);
14213 if (t == NULL_RTX)
14214 return false;
14215 reg_rtxs[i] = t;
14216 regs[i] = REGNO (t);
14220 /* Compute an ordering that maps the register numbers to an ascending
14221 sequence. */
14222 reg_order[0] = 0;
14223 for (i = 0; i < nops; i++)
14224 if (regs[i] < regs[reg_order[0]])
14225 reg_order[0] = i;
14227 for (i = 1; i < nops; i++)
14229 int this_order = reg_order[i - 1];
14230 for (j = 0; j < nops; j++)
14231 if (regs[j] > regs[reg_order[i - 1]]
14232 && (this_order == reg_order[i - 1]
14233 || regs[j] < regs[this_order]))
14234 this_order = j;
14235 reg_order[i] = this_order;
14238 /* Ensure that registers that must be live after the instruction end
14239 up with the correct value. */
14240 for (i = 0; i < nops; i++)
14242 int this_order = reg_order[i];
14243 if ((this_order != mem_order[i]
14244 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14245 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14246 return false;
14249 /* Load the constants. */
14250 for (i = 0; i < nops; i++)
14252 rtx op = operands[2 * nops + mem_order[i]];
14253 sorted_regs[i] = regs[reg_order[i]];
14254 emit_move_insn (reg_rtxs[reg_order[i]], op);
14257 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14259 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14260 if (TARGET_THUMB1)
14262 gcc_assert (base_reg_dies);
14263 write_back = TRUE;
14266 if (stm_case == 5)
14268 gcc_assert (base_reg_dies);
14269 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14270 offset = 0;
14273 addr = plus_constant (Pmode, base_reg_rtx, offset);
14275 for (i = 0; i < nops; i++)
14277 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14278 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14279 SImode, addr, 0);
14281 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14282 write_back ? offset + i * 4 : 0));
14283 return true;
14286 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14287 unaligned copies on processors which support unaligned semantics for those
14288 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14289 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14290 An interleave factor of 1 (the minimum) will perform no interleaving.
14291 Load/store multiple are used for aligned addresses where possible. */
14293 static void
14294 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14295 HOST_WIDE_INT length,
14296 unsigned int interleave_factor)
14298 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14299 int *regnos = XALLOCAVEC (int, interleave_factor);
14300 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14301 HOST_WIDE_INT i, j;
14302 HOST_WIDE_INT remaining = length, words;
14303 rtx halfword_tmp = NULL, byte_tmp = NULL;
14304 rtx dst, src;
14305 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14306 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14307 HOST_WIDE_INT srcoffset, dstoffset;
14308 HOST_WIDE_INT src_autoinc, dst_autoinc;
14309 rtx mem, addr;
14311 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14313 /* Use hard registers if we have aligned source or destination so we can use
14314 load/store multiple with contiguous registers. */
14315 if (dst_aligned || src_aligned)
14316 for (i = 0; i < interleave_factor; i++)
14317 regs[i] = gen_rtx_REG (SImode, i);
14318 else
14319 for (i = 0; i < interleave_factor; i++)
14320 regs[i] = gen_reg_rtx (SImode);
14322 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14323 src = copy_addr_to_reg (XEXP (srcbase, 0));
14325 srcoffset = dstoffset = 0;
14327 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14328 For copying the last bytes we want to subtract this offset again. */
14329 src_autoinc = dst_autoinc = 0;
14331 for (i = 0; i < interleave_factor; i++)
14332 regnos[i] = i;
14334 /* Copy BLOCK_SIZE_BYTES chunks. */
14336 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14338 /* Load words. */
14339 if (src_aligned && interleave_factor > 1)
14341 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14342 TRUE, srcbase, &srcoffset));
14343 src_autoinc += UNITS_PER_WORD * interleave_factor;
14345 else
14347 for (j = 0; j < interleave_factor; j++)
14349 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14350 - src_autoinc));
14351 mem = adjust_automodify_address (srcbase, SImode, addr,
14352 srcoffset + j * UNITS_PER_WORD);
14353 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14355 srcoffset += block_size_bytes;
14358 /* Store words. */
14359 if (dst_aligned && interleave_factor > 1)
14361 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14362 TRUE, dstbase, &dstoffset));
14363 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14365 else
14367 for (j = 0; j < interleave_factor; j++)
14369 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14370 - dst_autoinc));
14371 mem = adjust_automodify_address (dstbase, SImode, addr,
14372 dstoffset + j * UNITS_PER_WORD);
14373 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14375 dstoffset += block_size_bytes;
14378 remaining -= block_size_bytes;
14381 /* Copy any whole words left (note these aren't interleaved with any
14382 subsequent halfword/byte load/stores in the interests of simplicity). */
14384 words = remaining / UNITS_PER_WORD;
14386 gcc_assert (words < interleave_factor);
14388 if (src_aligned && words > 1)
14390 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14391 &srcoffset));
14392 src_autoinc += UNITS_PER_WORD * words;
14394 else
14396 for (j = 0; j < words; j++)
14398 addr = plus_constant (Pmode, src,
14399 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14400 mem = adjust_automodify_address (srcbase, SImode, addr,
14401 srcoffset + j * UNITS_PER_WORD);
14402 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14404 srcoffset += words * UNITS_PER_WORD;
14407 if (dst_aligned && words > 1)
14409 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14410 &dstoffset));
14411 dst_autoinc += words * UNITS_PER_WORD;
14413 else
14415 for (j = 0; j < words; j++)
14417 addr = plus_constant (Pmode, dst,
14418 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14419 mem = adjust_automodify_address (dstbase, SImode, addr,
14420 dstoffset + j * UNITS_PER_WORD);
14421 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14423 dstoffset += words * UNITS_PER_WORD;
14426 remaining -= words * UNITS_PER_WORD;
14428 gcc_assert (remaining < 4);
14430 /* Copy a halfword if necessary. */
14432 if (remaining >= 2)
14434 halfword_tmp = gen_reg_rtx (SImode);
14436 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14437 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14438 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14440 /* Either write out immediately, or delay until we've loaded the last
14441 byte, depending on interleave factor. */
14442 if (interleave_factor == 1)
14444 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14445 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14446 emit_insn (gen_unaligned_storehi (mem,
14447 gen_lowpart (HImode, halfword_tmp)));
14448 halfword_tmp = NULL;
14449 dstoffset += 2;
14452 remaining -= 2;
14453 srcoffset += 2;
14456 gcc_assert (remaining < 2);
14458 /* Copy last byte. */
14460 if ((remaining & 1) != 0)
14462 byte_tmp = gen_reg_rtx (SImode);
14464 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14465 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14466 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14468 if (interleave_factor == 1)
14470 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14471 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14472 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14473 byte_tmp = NULL;
14474 dstoffset++;
14477 remaining--;
14478 srcoffset++;
14481 /* Store last halfword if we haven't done so already. */
14483 if (halfword_tmp)
14485 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14486 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14487 emit_insn (gen_unaligned_storehi (mem,
14488 gen_lowpart (HImode, halfword_tmp)));
14489 dstoffset += 2;
14492 /* Likewise for last byte. */
14494 if (byte_tmp)
14496 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14497 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14498 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14499 dstoffset++;
14502 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14505 /* From mips_adjust_block_mem:
14507 Helper function for doing a loop-based block operation on memory
14508 reference MEM. Each iteration of the loop will operate on LENGTH
14509 bytes of MEM.
14511 Create a new base register for use within the loop and point it to
14512 the start of MEM. Create a new memory reference that uses this
14513 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14515 static void
14516 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14517 rtx *loop_mem)
14519 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14521 /* Although the new mem does not refer to a known location,
14522 it does keep up to LENGTH bytes of alignment. */
14523 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14524 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14527 /* From mips_block_move_loop:
14529 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14530 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14531 the memory regions do not overlap. */
14533 static void
14534 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14535 unsigned int interleave_factor,
14536 HOST_WIDE_INT bytes_per_iter)
14538 rtx src_reg, dest_reg, final_src, test;
14539 HOST_WIDE_INT leftover;
14541 leftover = length % bytes_per_iter;
14542 length -= leftover;
14544 /* Create registers and memory references for use within the loop. */
14545 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14546 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14548 /* Calculate the value that SRC_REG should have after the last iteration of
14549 the loop. */
14550 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14551 0, 0, OPTAB_WIDEN);
14553 /* Emit the start of the loop. */
14554 rtx_code_label *label = gen_label_rtx ();
14555 emit_label (label);
14557 /* Emit the loop body. */
14558 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14559 interleave_factor);
14561 /* Move on to the next block. */
14562 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14563 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14565 /* Emit the loop condition. */
14566 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14567 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14569 /* Mop up any left-over bytes. */
14570 if (leftover)
14571 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14574 /* Emit a block move when either the source or destination is unaligned (not
14575 aligned to a four-byte boundary). This may need further tuning depending on
14576 core type, optimize_size setting, etc. */
14578 static int
14579 arm_movmemqi_unaligned (rtx *operands)
14581 HOST_WIDE_INT length = INTVAL (operands[2]);
14583 if (optimize_size)
14585 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14586 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14587 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14588 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14589 or dst_aligned though: allow more interleaving in those cases since the
14590 resulting code can be smaller. */
14591 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14592 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14594 if (length > 12)
14595 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14596 interleave_factor, bytes_per_iter);
14597 else
14598 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14599 interleave_factor);
14601 else
14603 /* Note that the loop created by arm_block_move_unaligned_loop may be
14604 subject to loop unrolling, which makes tuning this condition a little
14605 redundant. */
14606 if (length > 32)
14607 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14608 else
14609 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14612 return 1;
14616 arm_gen_movmemqi (rtx *operands)
14618 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14619 HOST_WIDE_INT srcoffset, dstoffset;
14620 int i;
14621 rtx src, dst, srcbase, dstbase;
14622 rtx part_bytes_reg = NULL;
14623 rtx mem;
14625 if (!CONST_INT_P (operands[2])
14626 || !CONST_INT_P (operands[3])
14627 || INTVAL (operands[2]) > 64)
14628 return 0;
14630 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14631 return arm_movmemqi_unaligned (operands);
14633 if (INTVAL (operands[3]) & 3)
14634 return 0;
14636 dstbase = operands[0];
14637 srcbase = operands[1];
14639 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14640 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14642 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14643 out_words_to_go = INTVAL (operands[2]) / 4;
14644 last_bytes = INTVAL (operands[2]) & 3;
14645 dstoffset = srcoffset = 0;
14647 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14648 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14650 for (i = 0; in_words_to_go >= 2; i+=4)
14652 if (in_words_to_go > 4)
14653 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14654 TRUE, srcbase, &srcoffset));
14655 else
14656 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14657 src, FALSE, srcbase,
14658 &srcoffset));
14660 if (out_words_to_go)
14662 if (out_words_to_go > 4)
14663 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14664 TRUE, dstbase, &dstoffset));
14665 else if (out_words_to_go != 1)
14666 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14667 out_words_to_go, dst,
14668 (last_bytes == 0
14669 ? FALSE : TRUE),
14670 dstbase, &dstoffset));
14671 else
14673 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14674 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
14675 if (last_bytes != 0)
14677 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14678 dstoffset += 4;
14683 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14684 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14687 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14688 if (out_words_to_go)
14690 rtx sreg;
14692 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14693 sreg = copy_to_reg (mem);
14695 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14696 emit_move_insn (mem, sreg);
14697 in_words_to_go--;
14699 gcc_assert (!in_words_to_go); /* Sanity check */
14702 if (in_words_to_go)
14704 gcc_assert (in_words_to_go > 0);
14706 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14707 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14710 gcc_assert (!last_bytes || part_bytes_reg);
14712 if (BYTES_BIG_ENDIAN && last_bytes)
14714 rtx tmp = gen_reg_rtx (SImode);
14716 /* The bytes we want are in the top end of the word. */
14717 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14718 GEN_INT (8 * (4 - last_bytes))));
14719 part_bytes_reg = tmp;
14721 while (last_bytes)
14723 mem = adjust_automodify_address (dstbase, QImode,
14724 plus_constant (Pmode, dst,
14725 last_bytes - 1),
14726 dstoffset + last_bytes - 1);
14727 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14729 if (--last_bytes)
14731 tmp = gen_reg_rtx (SImode);
14732 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14733 part_bytes_reg = tmp;
14738 else
14740 if (last_bytes > 1)
14742 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14743 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14744 last_bytes -= 2;
14745 if (last_bytes)
14747 rtx tmp = gen_reg_rtx (SImode);
14748 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14749 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14750 part_bytes_reg = tmp;
14751 dstoffset += 2;
14755 if (last_bytes)
14757 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14758 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14762 return 1;
14765 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14766 by mode size. */
14767 inline static rtx
14768 next_consecutive_mem (rtx mem)
14770 machine_mode mode = GET_MODE (mem);
14771 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14772 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14774 return adjust_automodify_address (mem, mode, addr, offset);
14777 /* Copy using LDRD/STRD instructions whenever possible.
14778 Returns true upon success. */
14779 bool
14780 gen_movmem_ldrd_strd (rtx *operands)
14782 unsigned HOST_WIDE_INT len;
14783 HOST_WIDE_INT align;
14784 rtx src, dst, base;
14785 rtx reg0;
14786 bool src_aligned, dst_aligned;
14787 bool src_volatile, dst_volatile;
14789 gcc_assert (CONST_INT_P (operands[2]));
14790 gcc_assert (CONST_INT_P (operands[3]));
14792 len = UINTVAL (operands[2]);
14793 if (len > 64)
14794 return false;
14796 /* Maximum alignment we can assume for both src and dst buffers. */
14797 align = INTVAL (operands[3]);
14799 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14800 return false;
14802 /* Place src and dst addresses in registers
14803 and update the corresponding mem rtx. */
14804 dst = operands[0];
14805 dst_volatile = MEM_VOLATILE_P (dst);
14806 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14807 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14808 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14810 src = operands[1];
14811 src_volatile = MEM_VOLATILE_P (src);
14812 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14813 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14814 src = adjust_automodify_address (src, VOIDmode, base, 0);
14816 if (!unaligned_access && !(src_aligned && dst_aligned))
14817 return false;
14819 if (src_volatile || dst_volatile)
14820 return false;
14822 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14823 if (!(dst_aligned || src_aligned))
14824 return arm_gen_movmemqi (operands);
14826 src = adjust_address (src, DImode, 0);
14827 dst = adjust_address (dst, DImode, 0);
14828 while (len >= 8)
14830 len -= 8;
14831 reg0 = gen_reg_rtx (DImode);
14832 if (src_aligned)
14833 emit_move_insn (reg0, src);
14834 else
14835 emit_insn (gen_unaligned_loaddi (reg0, src));
14837 if (dst_aligned)
14838 emit_move_insn (dst, reg0);
14839 else
14840 emit_insn (gen_unaligned_storedi (dst, reg0));
14842 src = next_consecutive_mem (src);
14843 dst = next_consecutive_mem (dst);
14846 gcc_assert (len < 8);
14847 if (len >= 4)
14849 /* More than a word but less than a double-word to copy. Copy a word. */
14850 reg0 = gen_reg_rtx (SImode);
14851 src = adjust_address (src, SImode, 0);
14852 dst = adjust_address (dst, SImode, 0);
14853 if (src_aligned)
14854 emit_move_insn (reg0, src);
14855 else
14856 emit_insn (gen_unaligned_loadsi (reg0, src));
14858 if (dst_aligned)
14859 emit_move_insn (dst, reg0);
14860 else
14861 emit_insn (gen_unaligned_storesi (dst, reg0));
14863 src = next_consecutive_mem (src);
14864 dst = next_consecutive_mem (dst);
14865 len -= 4;
14868 if (len == 0)
14869 return true;
14871 /* Copy the remaining bytes. */
14872 if (len >= 2)
14874 dst = adjust_address (dst, HImode, 0);
14875 src = adjust_address (src, HImode, 0);
14876 reg0 = gen_reg_rtx (SImode);
14877 if (src_aligned)
14878 emit_insn (gen_zero_extendhisi2 (reg0, src));
14879 else
14880 emit_insn (gen_unaligned_loadhiu (reg0, src));
14882 if (dst_aligned)
14883 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14884 else
14885 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14887 src = next_consecutive_mem (src);
14888 dst = next_consecutive_mem (dst);
14889 if (len == 2)
14890 return true;
14893 dst = adjust_address (dst, QImode, 0);
14894 src = adjust_address (src, QImode, 0);
14895 reg0 = gen_reg_rtx (QImode);
14896 emit_move_insn (reg0, src);
14897 emit_move_insn (dst, reg0);
14898 return true;
14901 /* Select a dominance comparison mode if possible for a test of the general
14902 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14903 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14904 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14905 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14906 In all cases OP will be either EQ or NE, but we don't need to know which
14907 here. If we are unable to support a dominance comparison we return
14908 CC mode. This will then fail to match for the RTL expressions that
14909 generate this call. */
14910 machine_mode
14911 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14913 enum rtx_code cond1, cond2;
14914 int swapped = 0;
14916 /* Currently we will probably get the wrong result if the individual
14917 comparisons are not simple. This also ensures that it is safe to
14918 reverse a comparison if necessary. */
14919 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14920 != CCmode)
14921 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14922 != CCmode))
14923 return CCmode;
14925 /* The if_then_else variant of this tests the second condition if the
14926 first passes, but is true if the first fails. Reverse the first
14927 condition to get a true "inclusive-or" expression. */
14928 if (cond_or == DOM_CC_NX_OR_Y)
14929 cond1 = reverse_condition (cond1);
14931 /* If the comparisons are not equal, and one doesn't dominate the other,
14932 then we can't do this. */
14933 if (cond1 != cond2
14934 && !comparison_dominates_p (cond1, cond2)
14935 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14936 return CCmode;
14938 if (swapped)
14939 std::swap (cond1, cond2);
14941 switch (cond1)
14943 case EQ:
14944 if (cond_or == DOM_CC_X_AND_Y)
14945 return CC_DEQmode;
14947 switch (cond2)
14949 case EQ: return CC_DEQmode;
14950 case LE: return CC_DLEmode;
14951 case LEU: return CC_DLEUmode;
14952 case GE: return CC_DGEmode;
14953 case GEU: return CC_DGEUmode;
14954 default: gcc_unreachable ();
14957 case LT:
14958 if (cond_or == DOM_CC_X_AND_Y)
14959 return CC_DLTmode;
14961 switch (cond2)
14963 case LT:
14964 return CC_DLTmode;
14965 case LE:
14966 return CC_DLEmode;
14967 case NE:
14968 return CC_DNEmode;
14969 default:
14970 gcc_unreachable ();
14973 case GT:
14974 if (cond_or == DOM_CC_X_AND_Y)
14975 return CC_DGTmode;
14977 switch (cond2)
14979 case GT:
14980 return CC_DGTmode;
14981 case GE:
14982 return CC_DGEmode;
14983 case NE:
14984 return CC_DNEmode;
14985 default:
14986 gcc_unreachable ();
14989 case LTU:
14990 if (cond_or == DOM_CC_X_AND_Y)
14991 return CC_DLTUmode;
14993 switch (cond2)
14995 case LTU:
14996 return CC_DLTUmode;
14997 case LEU:
14998 return CC_DLEUmode;
14999 case NE:
15000 return CC_DNEmode;
15001 default:
15002 gcc_unreachable ();
15005 case GTU:
15006 if (cond_or == DOM_CC_X_AND_Y)
15007 return CC_DGTUmode;
15009 switch (cond2)
15011 case GTU:
15012 return CC_DGTUmode;
15013 case GEU:
15014 return CC_DGEUmode;
15015 case NE:
15016 return CC_DNEmode;
15017 default:
15018 gcc_unreachable ();
15021 /* The remaining cases only occur when both comparisons are the
15022 same. */
15023 case NE:
15024 gcc_assert (cond1 == cond2);
15025 return CC_DNEmode;
15027 case LE:
15028 gcc_assert (cond1 == cond2);
15029 return CC_DLEmode;
15031 case GE:
15032 gcc_assert (cond1 == cond2);
15033 return CC_DGEmode;
15035 case LEU:
15036 gcc_assert (cond1 == cond2);
15037 return CC_DLEUmode;
15039 case GEU:
15040 gcc_assert (cond1 == cond2);
15041 return CC_DGEUmode;
15043 default:
15044 gcc_unreachable ();
15048 machine_mode
15049 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15051 /* All floating point compares return CCFP if it is an equality
15052 comparison, and CCFPE otherwise. */
15053 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15055 switch (op)
15057 case EQ:
15058 case NE:
15059 case UNORDERED:
15060 case ORDERED:
15061 case UNLT:
15062 case UNLE:
15063 case UNGT:
15064 case UNGE:
15065 case UNEQ:
15066 case LTGT:
15067 return CCFPmode;
15069 case LT:
15070 case LE:
15071 case GT:
15072 case GE:
15073 return CCFPEmode;
15075 default:
15076 gcc_unreachable ();
15080 /* A compare with a shifted operand. Because of canonicalization, the
15081 comparison will have to be swapped when we emit the assembler. */
15082 if (GET_MODE (y) == SImode
15083 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15084 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15085 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15086 || GET_CODE (x) == ROTATERT))
15087 return CC_SWPmode;
15089 /* This operation is performed swapped, but since we only rely on the Z
15090 flag we don't need an additional mode. */
15091 if (GET_MODE (y) == SImode
15092 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15093 && GET_CODE (x) == NEG
15094 && (op == EQ || op == NE))
15095 return CC_Zmode;
15097 /* This is a special case that is used by combine to allow a
15098 comparison of a shifted byte load to be split into a zero-extend
15099 followed by a comparison of the shifted integer (only valid for
15100 equalities and unsigned inequalities). */
15101 if (GET_MODE (x) == SImode
15102 && GET_CODE (x) == ASHIFT
15103 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15104 && GET_CODE (XEXP (x, 0)) == SUBREG
15105 && MEM_P (SUBREG_REG (XEXP (x, 0)))
15106 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15107 && (op == EQ || op == NE
15108 || op == GEU || op == GTU || op == LTU || op == LEU)
15109 && CONST_INT_P (y))
15110 return CC_Zmode;
15112 /* A construct for a conditional compare, if the false arm contains
15113 0, then both conditions must be true, otherwise either condition
15114 must be true. Not all conditions are possible, so CCmode is
15115 returned if it can't be done. */
15116 if (GET_CODE (x) == IF_THEN_ELSE
15117 && (XEXP (x, 2) == const0_rtx
15118 || XEXP (x, 2) == const1_rtx)
15119 && COMPARISON_P (XEXP (x, 0))
15120 && COMPARISON_P (XEXP (x, 1)))
15121 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15122 INTVAL (XEXP (x, 2)));
15124 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15125 if (GET_CODE (x) == AND
15126 && (op == EQ || op == NE)
15127 && COMPARISON_P (XEXP (x, 0))
15128 && COMPARISON_P (XEXP (x, 1)))
15129 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15130 DOM_CC_X_AND_Y);
15132 if (GET_CODE (x) == IOR
15133 && (op == EQ || op == NE)
15134 && COMPARISON_P (XEXP (x, 0))
15135 && COMPARISON_P (XEXP (x, 1)))
15136 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15137 DOM_CC_X_OR_Y);
15139 /* An operation (on Thumb) where we want to test for a single bit.
15140 This is done by shifting that bit up into the top bit of a
15141 scratch register; we can then branch on the sign bit. */
15142 if (TARGET_THUMB1
15143 && GET_MODE (x) == SImode
15144 && (op == EQ || op == NE)
15145 && GET_CODE (x) == ZERO_EXTRACT
15146 && XEXP (x, 1) == const1_rtx)
15147 return CC_Nmode;
15149 /* An operation that sets the condition codes as a side-effect, the
15150 V flag is not set correctly, so we can only use comparisons where
15151 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15152 instead.) */
15153 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15154 if (GET_MODE (x) == SImode
15155 && y == const0_rtx
15156 && (op == EQ || op == NE || op == LT || op == GE)
15157 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15158 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15159 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15160 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15161 || GET_CODE (x) == LSHIFTRT
15162 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15163 || GET_CODE (x) == ROTATERT
15164 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15165 return CC_NOOVmode;
15167 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15168 return CC_Zmode;
15170 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15171 && GET_CODE (x) == PLUS
15172 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15173 return CC_Cmode;
15175 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15177 switch (op)
15179 case EQ:
15180 case NE:
15181 /* A DImode comparison against zero can be implemented by
15182 or'ing the two halves together. */
15183 if (y == const0_rtx)
15184 return CC_Zmode;
15186 /* We can do an equality test in three Thumb instructions. */
15187 if (!TARGET_32BIT)
15188 return CC_Zmode;
15190 /* FALLTHROUGH */
15192 case LTU:
15193 case LEU:
15194 case GTU:
15195 case GEU:
15196 /* DImode unsigned comparisons can be implemented by cmp +
15197 cmpeq without a scratch register. Not worth doing in
15198 Thumb-2. */
15199 if (TARGET_32BIT)
15200 return CC_CZmode;
15202 /* FALLTHROUGH */
15204 case LT:
15205 case LE:
15206 case GT:
15207 case GE:
15208 /* DImode signed and unsigned comparisons can be implemented
15209 by cmp + sbcs with a scratch register, but that does not
15210 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15211 gcc_assert (op != EQ && op != NE);
15212 return CC_NCVmode;
15214 default:
15215 gcc_unreachable ();
15219 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15220 return GET_MODE (x);
15222 return CCmode;
15225 /* X and Y are two things to compare using CODE. Emit the compare insn and
15226 return the rtx for register 0 in the proper mode. FP means this is a
15227 floating point compare: I don't think that it is needed on the arm. */
15229 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15231 machine_mode mode;
15232 rtx cc_reg;
15233 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15235 /* We might have X as a constant, Y as a register because of the predicates
15236 used for cmpdi. If so, force X to a register here. */
15237 if (dimode_comparison && !REG_P (x))
15238 x = force_reg (DImode, x);
15240 mode = SELECT_CC_MODE (code, x, y);
15241 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15243 if (dimode_comparison
15244 && mode != CC_CZmode)
15246 rtx clobber, set;
15248 /* To compare two non-zero values for equality, XOR them and
15249 then compare against zero. Not used for ARM mode; there
15250 CC_CZmode is cheaper. */
15251 if (mode == CC_Zmode && y != const0_rtx)
15253 gcc_assert (!reload_completed);
15254 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15255 y = const0_rtx;
15258 /* A scratch register is required. */
15259 if (reload_completed)
15260 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15261 else
15262 scratch = gen_rtx_SCRATCH (SImode);
15264 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15265 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
15266 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15268 else
15269 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15271 return cc_reg;
15274 /* Generate a sequence of insns that will generate the correct return
15275 address mask depending on the physical architecture that the program
15276 is running on. */
15278 arm_gen_return_addr_mask (void)
15280 rtx reg = gen_reg_rtx (Pmode);
15282 emit_insn (gen_return_addr_mask (reg));
15283 return reg;
15286 void
15287 arm_reload_in_hi (rtx *operands)
15289 rtx ref = operands[1];
15290 rtx base, scratch;
15291 HOST_WIDE_INT offset = 0;
15293 if (GET_CODE (ref) == SUBREG)
15295 offset = SUBREG_BYTE (ref);
15296 ref = SUBREG_REG (ref);
15299 if (REG_P (ref))
15301 /* We have a pseudo which has been spilt onto the stack; there
15302 are two cases here: the first where there is a simple
15303 stack-slot replacement and a second where the stack-slot is
15304 out of range, or is used as a subreg. */
15305 if (reg_equiv_mem (REGNO (ref)))
15307 ref = reg_equiv_mem (REGNO (ref));
15308 base = find_replacement (&XEXP (ref, 0));
15310 else
15311 /* The slot is out of range, or was dressed up in a SUBREG. */
15312 base = reg_equiv_address (REGNO (ref));
15314 else
15315 base = find_replacement (&XEXP (ref, 0));
15317 /* Handle the case where the address is too complex to be offset by 1. */
15318 if (GET_CODE (base) == MINUS
15319 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15321 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15323 emit_set_insn (base_plus, base);
15324 base = base_plus;
15326 else if (GET_CODE (base) == PLUS)
15328 /* The addend must be CONST_INT, or we would have dealt with it above. */
15329 HOST_WIDE_INT hi, lo;
15331 offset += INTVAL (XEXP (base, 1));
15332 base = XEXP (base, 0);
15334 /* Rework the address into a legal sequence of insns. */
15335 /* Valid range for lo is -4095 -> 4095 */
15336 lo = (offset >= 0
15337 ? (offset & 0xfff)
15338 : -((-offset) & 0xfff));
15340 /* Corner case, if lo is the max offset then we would be out of range
15341 once we have added the additional 1 below, so bump the msb into the
15342 pre-loading insn(s). */
15343 if (lo == 4095)
15344 lo &= 0x7ff;
15346 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15347 ^ (HOST_WIDE_INT) 0x80000000)
15348 - (HOST_WIDE_INT) 0x80000000);
15350 gcc_assert (hi + lo == offset);
15352 if (hi != 0)
15354 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15356 /* Get the base address; addsi3 knows how to handle constants
15357 that require more than one insn. */
15358 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15359 base = base_plus;
15360 offset = lo;
15364 /* Operands[2] may overlap operands[0] (though it won't overlap
15365 operands[1]), that's why we asked for a DImode reg -- so we can
15366 use the bit that does not overlap. */
15367 if (REGNO (operands[2]) == REGNO (operands[0]))
15368 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15369 else
15370 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15372 emit_insn (gen_zero_extendqisi2 (scratch,
15373 gen_rtx_MEM (QImode,
15374 plus_constant (Pmode, base,
15375 offset))));
15376 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15377 gen_rtx_MEM (QImode,
15378 plus_constant (Pmode, base,
15379 offset + 1))));
15380 if (!BYTES_BIG_ENDIAN)
15381 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15382 gen_rtx_IOR (SImode,
15383 gen_rtx_ASHIFT
15384 (SImode,
15385 gen_rtx_SUBREG (SImode, operands[0], 0),
15386 GEN_INT (8)),
15387 scratch));
15388 else
15389 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15390 gen_rtx_IOR (SImode,
15391 gen_rtx_ASHIFT (SImode, scratch,
15392 GEN_INT (8)),
15393 gen_rtx_SUBREG (SImode, operands[0], 0)));
15396 /* Handle storing a half-word to memory during reload by synthesizing as two
15397 byte stores. Take care not to clobber the input values until after we
15398 have moved them somewhere safe. This code assumes that if the DImode
15399 scratch in operands[2] overlaps either the input value or output address
15400 in some way, then that value must die in this insn (we absolutely need
15401 two scratch registers for some corner cases). */
15402 void
15403 arm_reload_out_hi (rtx *operands)
15405 rtx ref = operands[0];
15406 rtx outval = operands[1];
15407 rtx base, scratch;
15408 HOST_WIDE_INT offset = 0;
15410 if (GET_CODE (ref) == SUBREG)
15412 offset = SUBREG_BYTE (ref);
15413 ref = SUBREG_REG (ref);
15416 if (REG_P (ref))
15418 /* We have a pseudo which has been spilt onto the stack; there
15419 are two cases here: the first where there is a simple
15420 stack-slot replacement and a second where the stack-slot is
15421 out of range, or is used as a subreg. */
15422 if (reg_equiv_mem (REGNO (ref)))
15424 ref = reg_equiv_mem (REGNO (ref));
15425 base = find_replacement (&XEXP (ref, 0));
15427 else
15428 /* The slot is out of range, or was dressed up in a SUBREG. */
15429 base = reg_equiv_address (REGNO (ref));
15431 else
15432 base = find_replacement (&XEXP (ref, 0));
15434 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15436 /* Handle the case where the address is too complex to be offset by 1. */
15437 if (GET_CODE (base) == MINUS
15438 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15440 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15442 /* Be careful not to destroy OUTVAL. */
15443 if (reg_overlap_mentioned_p (base_plus, outval))
15445 /* Updating base_plus might destroy outval, see if we can
15446 swap the scratch and base_plus. */
15447 if (!reg_overlap_mentioned_p (scratch, outval))
15448 std::swap (scratch, base_plus);
15449 else
15451 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15453 /* Be conservative and copy OUTVAL into the scratch now,
15454 this should only be necessary if outval is a subreg
15455 of something larger than a word. */
15456 /* XXX Might this clobber base? I can't see how it can,
15457 since scratch is known to overlap with OUTVAL, and
15458 must be wider than a word. */
15459 emit_insn (gen_movhi (scratch_hi, outval));
15460 outval = scratch_hi;
15464 emit_set_insn (base_plus, base);
15465 base = base_plus;
15467 else if (GET_CODE (base) == PLUS)
15469 /* The addend must be CONST_INT, or we would have dealt with it above. */
15470 HOST_WIDE_INT hi, lo;
15472 offset += INTVAL (XEXP (base, 1));
15473 base = XEXP (base, 0);
15475 /* Rework the address into a legal sequence of insns. */
15476 /* Valid range for lo is -4095 -> 4095 */
15477 lo = (offset >= 0
15478 ? (offset & 0xfff)
15479 : -((-offset) & 0xfff));
15481 /* Corner case, if lo is the max offset then we would be out of range
15482 once we have added the additional 1 below, so bump the msb into the
15483 pre-loading insn(s). */
15484 if (lo == 4095)
15485 lo &= 0x7ff;
15487 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15488 ^ (HOST_WIDE_INT) 0x80000000)
15489 - (HOST_WIDE_INT) 0x80000000);
15491 gcc_assert (hi + lo == offset);
15493 if (hi != 0)
15495 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15497 /* Be careful not to destroy OUTVAL. */
15498 if (reg_overlap_mentioned_p (base_plus, outval))
15500 /* Updating base_plus might destroy outval, see if we
15501 can swap the scratch and base_plus. */
15502 if (!reg_overlap_mentioned_p (scratch, outval))
15503 std::swap (scratch, base_plus);
15504 else
15506 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15508 /* Be conservative and copy outval into scratch now,
15509 this should only be necessary if outval is a
15510 subreg of something larger than a word. */
15511 /* XXX Might this clobber base? I can't see how it
15512 can, since scratch is known to overlap with
15513 outval. */
15514 emit_insn (gen_movhi (scratch_hi, outval));
15515 outval = scratch_hi;
15519 /* Get the base address; addsi3 knows how to handle constants
15520 that require more than one insn. */
15521 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15522 base = base_plus;
15523 offset = lo;
15527 if (BYTES_BIG_ENDIAN)
15529 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15530 plus_constant (Pmode, base,
15531 offset + 1)),
15532 gen_lowpart (QImode, outval)));
15533 emit_insn (gen_lshrsi3 (scratch,
15534 gen_rtx_SUBREG (SImode, outval, 0),
15535 GEN_INT (8)));
15536 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15537 offset)),
15538 gen_lowpart (QImode, scratch)));
15540 else
15542 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15543 offset)),
15544 gen_lowpart (QImode, outval)));
15545 emit_insn (gen_lshrsi3 (scratch,
15546 gen_rtx_SUBREG (SImode, outval, 0),
15547 GEN_INT (8)));
15548 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15549 plus_constant (Pmode, base,
15550 offset + 1)),
15551 gen_lowpart (QImode, scratch)));
15555 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15556 (padded to the size of a word) should be passed in a register. */
15558 static bool
15559 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15561 if (TARGET_AAPCS_BASED)
15562 return must_pass_in_stack_var_size (mode, type);
15563 else
15564 return must_pass_in_stack_var_size_or_pad (mode, type);
15568 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15569 Return true if an argument passed on the stack should be padded upwards,
15570 i.e. if the least-significant byte has useful data.
15571 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15572 aggregate types are placed in the lowest memory address. */
15574 bool
15575 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15577 if (!TARGET_AAPCS_BASED)
15578 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15580 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15581 return false;
15583 return true;
15587 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15588 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15589 register has useful data, and return the opposite if the most
15590 significant byte does. */
15592 bool
15593 arm_pad_reg_upward (machine_mode mode,
15594 tree type, int first ATTRIBUTE_UNUSED)
15596 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15598 /* For AAPCS, small aggregates, small fixed-point types,
15599 and small complex types are always padded upwards. */
15600 if (type)
15602 if ((AGGREGATE_TYPE_P (type)
15603 || TREE_CODE (type) == COMPLEX_TYPE
15604 || FIXED_POINT_TYPE_P (type))
15605 && int_size_in_bytes (type) <= 4)
15606 return true;
15608 else
15610 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15611 && GET_MODE_SIZE (mode) <= 4)
15612 return true;
15616 /* Otherwise, use default padding. */
15617 return !BYTES_BIG_ENDIAN;
15620 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15621 assuming that the address in the base register is word aligned. */
15622 bool
15623 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15625 HOST_WIDE_INT max_offset;
15627 /* Offset must be a multiple of 4 in Thumb mode. */
15628 if (TARGET_THUMB2 && ((offset & 3) != 0))
15629 return false;
15631 if (TARGET_THUMB2)
15632 max_offset = 1020;
15633 else if (TARGET_ARM)
15634 max_offset = 255;
15635 else
15636 return false;
15638 return ((offset <= max_offset) && (offset >= -max_offset));
15641 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15642 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15643 Assumes that the address in the base register RN is word aligned. Pattern
15644 guarantees that both memory accesses use the same base register,
15645 the offsets are constants within the range, and the gap between the offsets is 4.
15646 If preload complete then check that registers are legal. WBACK indicates whether
15647 address is updated. LOAD indicates whether memory access is load or store. */
15648 bool
15649 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15650 bool wback, bool load)
15652 unsigned int t, t2, n;
15654 if (!reload_completed)
15655 return true;
15657 if (!offset_ok_for_ldrd_strd (offset))
15658 return false;
15660 t = REGNO (rt);
15661 t2 = REGNO (rt2);
15662 n = REGNO (rn);
15664 if ((TARGET_THUMB2)
15665 && ((wback && (n == t || n == t2))
15666 || (t == SP_REGNUM)
15667 || (t == PC_REGNUM)
15668 || (t2 == SP_REGNUM)
15669 || (t2 == PC_REGNUM)
15670 || (!load && (n == PC_REGNUM))
15671 || (load && (t == t2))
15672 /* Triggers Cortex-M3 LDRD errata. */
15673 || (!wback && load && fix_cm3_ldrd && (n == t))))
15674 return false;
15676 if ((TARGET_ARM)
15677 && ((wback && (n == t || n == t2))
15678 || (t2 == PC_REGNUM)
15679 || (t % 2 != 0) /* First destination register is not even. */
15680 || (t2 != t + 1)
15681 /* PC can be used as base register (for offset addressing only),
15682 but it is depricated. */
15683 || (n == PC_REGNUM)))
15684 return false;
15686 return true;
15689 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15690 operand MEM's address contains an immediate offset from the base
15691 register and has no side effects, in which case it sets BASE and
15692 OFFSET accordingly. */
15693 static bool
15694 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15696 rtx addr;
15698 gcc_assert (base != NULL && offset != NULL);
15700 /* TODO: Handle more general memory operand patterns, such as
15701 PRE_DEC and PRE_INC. */
15703 if (side_effects_p (mem))
15704 return false;
15706 /* Can't deal with subregs. */
15707 if (GET_CODE (mem) == SUBREG)
15708 return false;
15710 gcc_assert (MEM_P (mem));
15712 *offset = const0_rtx;
15714 addr = XEXP (mem, 0);
15716 /* If addr isn't valid for DImode, then we can't handle it. */
15717 if (!arm_legitimate_address_p (DImode, addr,
15718 reload_in_progress || reload_completed))
15719 return false;
15721 if (REG_P (addr))
15723 *base = addr;
15724 return true;
15726 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15728 *base = XEXP (addr, 0);
15729 *offset = XEXP (addr, 1);
15730 return (REG_P (*base) && CONST_INT_P (*offset));
15733 return false;
15736 /* Called from a peephole2 to replace two word-size accesses with a
15737 single LDRD/STRD instruction. Returns true iff we can generate a
15738 new instruction sequence. That is, both accesses use the same base
15739 register and the gap between constant offsets is 4. This function
15740 may reorder its operands to match ldrd/strd RTL templates.
15741 OPERANDS are the operands found by the peephole matcher;
15742 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15743 corresponding memory operands. LOAD indicaates whether the access
15744 is load or store. CONST_STORE indicates a store of constant
15745 integer values held in OPERANDS[4,5] and assumes that the pattern
15746 is of length 4 insn, for the purpose of checking dead registers.
15747 COMMUTE indicates that register operands may be reordered. */
15748 bool
15749 gen_operands_ldrd_strd (rtx *operands, bool load,
15750 bool const_store, bool commute)
15752 int nops = 2;
15753 HOST_WIDE_INT offsets[2], offset;
15754 rtx base = NULL_RTX;
15755 rtx cur_base, cur_offset, tmp;
15756 int i, gap;
15757 HARD_REG_SET regset;
15759 gcc_assert (!const_store || !load);
15760 /* Check that the memory references are immediate offsets from the
15761 same base register. Extract the base register, the destination
15762 registers, and the corresponding memory offsets. */
15763 for (i = 0; i < nops; i++)
15765 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15766 return false;
15768 if (i == 0)
15769 base = cur_base;
15770 else if (REGNO (base) != REGNO (cur_base))
15771 return false;
15773 offsets[i] = INTVAL (cur_offset);
15774 if (GET_CODE (operands[i]) == SUBREG)
15776 tmp = SUBREG_REG (operands[i]);
15777 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15778 operands[i] = tmp;
15782 /* Make sure there is no dependency between the individual loads. */
15783 if (load && REGNO (operands[0]) == REGNO (base))
15784 return false; /* RAW */
15786 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15787 return false; /* WAW */
15789 /* If the same input register is used in both stores
15790 when storing different constants, try to find a free register.
15791 For example, the code
15792 mov r0, 0
15793 str r0, [r2]
15794 mov r0, 1
15795 str r0, [r2, #4]
15796 can be transformed into
15797 mov r1, 0
15798 strd r1, r0, [r2]
15799 in Thumb mode assuming that r1 is free. */
15800 if (const_store
15801 && REGNO (operands[0]) == REGNO (operands[1])
15802 && INTVAL (operands[4]) != INTVAL (operands[5]))
15804 if (TARGET_THUMB2)
15806 CLEAR_HARD_REG_SET (regset);
15807 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15808 if (tmp == NULL_RTX)
15809 return false;
15811 /* Use the new register in the first load to ensure that
15812 if the original input register is not dead after peephole,
15813 then it will have the correct constant value. */
15814 operands[0] = tmp;
15816 else if (TARGET_ARM)
15818 return false;
15819 int regno = REGNO (operands[0]);
15820 if (!peep2_reg_dead_p (4, operands[0]))
15822 /* When the input register is even and is not dead after the
15823 pattern, it has to hold the second constant but we cannot
15824 form a legal STRD in ARM mode with this register as the second
15825 register. */
15826 if (regno % 2 == 0)
15827 return false;
15829 /* Is regno-1 free? */
15830 SET_HARD_REG_SET (regset);
15831 CLEAR_HARD_REG_BIT(regset, regno - 1);
15832 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15833 if (tmp == NULL_RTX)
15834 return false;
15836 operands[0] = tmp;
15838 else
15840 /* Find a DImode register. */
15841 CLEAR_HARD_REG_SET (regset);
15842 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15843 if (tmp != NULL_RTX)
15845 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15846 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15848 else
15850 /* Can we use the input register to form a DI register? */
15851 SET_HARD_REG_SET (regset);
15852 CLEAR_HARD_REG_BIT(regset,
15853 regno % 2 == 0 ? regno + 1 : regno - 1);
15854 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15855 if (tmp == NULL_RTX)
15856 return false;
15857 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15861 gcc_assert (operands[0] != NULL_RTX);
15862 gcc_assert (operands[1] != NULL_RTX);
15863 gcc_assert (REGNO (operands[0]) % 2 == 0);
15864 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15868 /* Make sure the instructions are ordered with lower memory access first. */
15869 if (offsets[0] > offsets[1])
15871 gap = offsets[0] - offsets[1];
15872 offset = offsets[1];
15874 /* Swap the instructions such that lower memory is accessed first. */
15875 std::swap (operands[0], operands[1]);
15876 std::swap (operands[2], operands[3]);
15877 if (const_store)
15878 std::swap (operands[4], operands[5]);
15880 else
15882 gap = offsets[1] - offsets[0];
15883 offset = offsets[0];
15886 /* Make sure accesses are to consecutive memory locations. */
15887 if (gap != 4)
15888 return false;
15890 /* Make sure we generate legal instructions. */
15891 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15892 false, load))
15893 return true;
15895 /* In Thumb state, where registers are almost unconstrained, there
15896 is little hope to fix it. */
15897 if (TARGET_THUMB2)
15898 return false;
15900 if (load && commute)
15902 /* Try reordering registers. */
15903 std::swap (operands[0], operands[1]);
15904 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15905 false, load))
15906 return true;
15909 if (const_store)
15911 /* If input registers are dead after this pattern, they can be
15912 reordered or replaced by other registers that are free in the
15913 current pattern. */
15914 if (!peep2_reg_dead_p (4, operands[0])
15915 || !peep2_reg_dead_p (4, operands[1]))
15916 return false;
15918 /* Try to reorder the input registers. */
15919 /* For example, the code
15920 mov r0, 0
15921 mov r1, 1
15922 str r1, [r2]
15923 str r0, [r2, #4]
15924 can be transformed into
15925 mov r1, 0
15926 mov r0, 1
15927 strd r0, [r2]
15929 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15930 false, false))
15932 std::swap (operands[0], operands[1]);
15933 return true;
15936 /* Try to find a free DI register. */
15937 CLEAR_HARD_REG_SET (regset);
15938 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15939 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15940 while (true)
15942 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15943 if (tmp == NULL_RTX)
15944 return false;
15946 /* DREG must be an even-numbered register in DImode.
15947 Split it into SI registers. */
15948 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15949 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15950 gcc_assert (operands[0] != NULL_RTX);
15951 gcc_assert (operands[1] != NULL_RTX);
15952 gcc_assert (REGNO (operands[0]) % 2 == 0);
15953 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15955 return (operands_ok_ldrd_strd (operands[0], operands[1],
15956 base, offset,
15957 false, load));
15961 return false;
15967 /* Print a symbolic form of X to the debug file, F. */
15968 static void
15969 arm_print_value (FILE *f, rtx x)
15971 switch (GET_CODE (x))
15973 case CONST_INT:
15974 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15975 return;
15977 case CONST_DOUBLE:
15978 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15979 return;
15981 case CONST_VECTOR:
15983 int i;
15985 fprintf (f, "<");
15986 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15988 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15989 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15990 fputc (',', f);
15992 fprintf (f, ">");
15994 return;
15996 case CONST_STRING:
15997 fprintf (f, "\"%s\"", XSTR (x, 0));
15998 return;
16000 case SYMBOL_REF:
16001 fprintf (f, "`%s'", XSTR (x, 0));
16002 return;
16004 case LABEL_REF:
16005 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16006 return;
16008 case CONST:
16009 arm_print_value (f, XEXP (x, 0));
16010 return;
16012 case PLUS:
16013 arm_print_value (f, XEXP (x, 0));
16014 fprintf (f, "+");
16015 arm_print_value (f, XEXP (x, 1));
16016 return;
16018 case PC:
16019 fprintf (f, "pc");
16020 return;
16022 default:
16023 fprintf (f, "????");
16024 return;
16028 /* Routines for manipulation of the constant pool. */
16030 /* Arm instructions cannot load a large constant directly into a
16031 register; they have to come from a pc relative load. The constant
16032 must therefore be placed in the addressable range of the pc
16033 relative load. Depending on the precise pc relative load
16034 instruction the range is somewhere between 256 bytes and 4k. This
16035 means that we often have to dump a constant inside a function, and
16036 generate code to branch around it.
16038 It is important to minimize this, since the branches will slow
16039 things down and make the code larger.
16041 Normally we can hide the table after an existing unconditional
16042 branch so that there is no interruption of the flow, but in the
16043 worst case the code looks like this:
16045 ldr rn, L1
16047 b L2
16048 align
16049 L1: .long value
16053 ldr rn, L3
16055 b L4
16056 align
16057 L3: .long value
16061 We fix this by performing a scan after scheduling, which notices
16062 which instructions need to have their operands fetched from the
16063 constant table and builds the table.
16065 The algorithm starts by building a table of all the constants that
16066 need fixing up and all the natural barriers in the function (places
16067 where a constant table can be dropped without breaking the flow).
16068 For each fixup we note how far the pc-relative replacement will be
16069 able to reach and the offset of the instruction into the function.
16071 Having built the table we then group the fixes together to form
16072 tables that are as large as possible (subject to addressing
16073 constraints) and emit each table of constants after the last
16074 barrier that is within range of all the instructions in the group.
16075 If a group does not contain a barrier, then we forcibly create one
16076 by inserting a jump instruction into the flow. Once the table has
16077 been inserted, the insns are then modified to reference the
16078 relevant entry in the pool.
16080 Possible enhancements to the algorithm (not implemented) are:
16082 1) For some processors and object formats, there may be benefit in
16083 aligning the pools to the start of cache lines; this alignment
16084 would need to be taken into account when calculating addressability
16085 of a pool. */
16087 /* These typedefs are located at the start of this file, so that
16088 they can be used in the prototypes there. This comment is to
16089 remind readers of that fact so that the following structures
16090 can be understood more easily.
16092 typedef struct minipool_node Mnode;
16093 typedef struct minipool_fixup Mfix; */
16095 struct minipool_node
16097 /* Doubly linked chain of entries. */
16098 Mnode * next;
16099 Mnode * prev;
16100 /* The maximum offset into the code that this entry can be placed. While
16101 pushing fixes for forward references, all entries are sorted in order
16102 of increasing max_address. */
16103 HOST_WIDE_INT max_address;
16104 /* Similarly for an entry inserted for a backwards ref. */
16105 HOST_WIDE_INT min_address;
16106 /* The number of fixes referencing this entry. This can become zero
16107 if we "unpush" an entry. In this case we ignore the entry when we
16108 come to emit the code. */
16109 int refcount;
16110 /* The offset from the start of the minipool. */
16111 HOST_WIDE_INT offset;
16112 /* The value in table. */
16113 rtx value;
16114 /* The mode of value. */
16115 machine_mode mode;
16116 /* The size of the value. With iWMMXt enabled
16117 sizes > 4 also imply an alignment of 8-bytes. */
16118 int fix_size;
16121 struct minipool_fixup
16123 Mfix * next;
16124 rtx_insn * insn;
16125 HOST_WIDE_INT address;
16126 rtx * loc;
16127 machine_mode mode;
16128 int fix_size;
16129 rtx value;
16130 Mnode * minipool;
16131 HOST_WIDE_INT forwards;
16132 HOST_WIDE_INT backwards;
16135 /* Fixes less than a word need padding out to a word boundary. */
16136 #define MINIPOOL_FIX_SIZE(mode) \
16137 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16139 static Mnode * minipool_vector_head;
16140 static Mnode * minipool_vector_tail;
16141 static rtx_code_label *minipool_vector_label;
16142 static int minipool_pad;
16144 /* The linked list of all minipool fixes required for this function. */
16145 Mfix * minipool_fix_head;
16146 Mfix * minipool_fix_tail;
16147 /* The fix entry for the current minipool, once it has been placed. */
16148 Mfix * minipool_barrier;
16150 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16151 #define JUMP_TABLES_IN_TEXT_SECTION 0
16152 #endif
16154 static HOST_WIDE_INT
16155 get_jump_table_size (rtx_jump_table_data *insn)
16157 /* ADDR_VECs only take room if read-only data does into the text
16158 section. */
16159 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16161 rtx body = PATTERN (insn);
16162 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16163 HOST_WIDE_INT size;
16164 HOST_WIDE_INT modesize;
16166 modesize = GET_MODE_SIZE (GET_MODE (body));
16167 size = modesize * XVECLEN (body, elt);
16168 switch (modesize)
16170 case 1:
16171 /* Round up size of TBB table to a halfword boundary. */
16172 size = (size + 1) & ~(HOST_WIDE_INT)1;
16173 break;
16174 case 2:
16175 /* No padding necessary for TBH. */
16176 break;
16177 case 4:
16178 /* Add two bytes for alignment on Thumb. */
16179 if (TARGET_THUMB)
16180 size += 2;
16181 break;
16182 default:
16183 gcc_unreachable ();
16185 return size;
16188 return 0;
16191 /* Return the maximum amount of padding that will be inserted before
16192 label LABEL. */
16194 static HOST_WIDE_INT
16195 get_label_padding (rtx label)
16197 HOST_WIDE_INT align, min_insn_size;
16199 align = 1 << label_to_alignment (label);
16200 min_insn_size = TARGET_THUMB ? 2 : 4;
16201 return align > min_insn_size ? align - min_insn_size : 0;
16204 /* Move a minipool fix MP from its current location to before MAX_MP.
16205 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16206 constraints may need updating. */
16207 static Mnode *
16208 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16209 HOST_WIDE_INT max_address)
16211 /* The code below assumes these are different. */
16212 gcc_assert (mp != max_mp);
16214 if (max_mp == NULL)
16216 if (max_address < mp->max_address)
16217 mp->max_address = max_address;
16219 else
16221 if (max_address > max_mp->max_address - mp->fix_size)
16222 mp->max_address = max_mp->max_address - mp->fix_size;
16223 else
16224 mp->max_address = max_address;
16226 /* Unlink MP from its current position. Since max_mp is non-null,
16227 mp->prev must be non-null. */
16228 mp->prev->next = mp->next;
16229 if (mp->next != NULL)
16230 mp->next->prev = mp->prev;
16231 else
16232 minipool_vector_tail = mp->prev;
16234 /* Re-insert it before MAX_MP. */
16235 mp->next = max_mp;
16236 mp->prev = max_mp->prev;
16237 max_mp->prev = mp;
16239 if (mp->prev != NULL)
16240 mp->prev->next = mp;
16241 else
16242 minipool_vector_head = mp;
16245 /* Save the new entry. */
16246 max_mp = mp;
16248 /* Scan over the preceding entries and adjust their addresses as
16249 required. */
16250 while (mp->prev != NULL
16251 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16253 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16254 mp = mp->prev;
16257 return max_mp;
16260 /* Add a constant to the minipool for a forward reference. Returns the
16261 node added or NULL if the constant will not fit in this pool. */
16262 static Mnode *
16263 add_minipool_forward_ref (Mfix *fix)
16265 /* If set, max_mp is the first pool_entry that has a lower
16266 constraint than the one we are trying to add. */
16267 Mnode * max_mp = NULL;
16268 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16269 Mnode * mp;
16271 /* If the minipool starts before the end of FIX->INSN then this FIX
16272 can not be placed into the current pool. Furthermore, adding the
16273 new constant pool entry may cause the pool to start FIX_SIZE bytes
16274 earlier. */
16275 if (minipool_vector_head &&
16276 (fix->address + get_attr_length (fix->insn)
16277 >= minipool_vector_head->max_address - fix->fix_size))
16278 return NULL;
16280 /* Scan the pool to see if a constant with the same value has
16281 already been added. While we are doing this, also note the
16282 location where we must insert the constant if it doesn't already
16283 exist. */
16284 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16286 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16287 && fix->mode == mp->mode
16288 && (!LABEL_P (fix->value)
16289 || (CODE_LABEL_NUMBER (fix->value)
16290 == CODE_LABEL_NUMBER (mp->value)))
16291 && rtx_equal_p (fix->value, mp->value))
16293 /* More than one fix references this entry. */
16294 mp->refcount++;
16295 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16298 /* Note the insertion point if necessary. */
16299 if (max_mp == NULL
16300 && mp->max_address > max_address)
16301 max_mp = mp;
16303 /* If we are inserting an 8-bytes aligned quantity and
16304 we have not already found an insertion point, then
16305 make sure that all such 8-byte aligned quantities are
16306 placed at the start of the pool. */
16307 if (ARM_DOUBLEWORD_ALIGN
16308 && max_mp == NULL
16309 && fix->fix_size >= 8
16310 && mp->fix_size < 8)
16312 max_mp = mp;
16313 max_address = mp->max_address;
16317 /* The value is not currently in the minipool, so we need to create
16318 a new entry for it. If MAX_MP is NULL, the entry will be put on
16319 the end of the list since the placement is less constrained than
16320 any existing entry. Otherwise, we insert the new fix before
16321 MAX_MP and, if necessary, adjust the constraints on the other
16322 entries. */
16323 mp = XNEW (Mnode);
16324 mp->fix_size = fix->fix_size;
16325 mp->mode = fix->mode;
16326 mp->value = fix->value;
16327 mp->refcount = 1;
16328 /* Not yet required for a backwards ref. */
16329 mp->min_address = -65536;
16331 if (max_mp == NULL)
16333 mp->max_address = max_address;
16334 mp->next = NULL;
16335 mp->prev = minipool_vector_tail;
16337 if (mp->prev == NULL)
16339 minipool_vector_head = mp;
16340 minipool_vector_label = gen_label_rtx ();
16342 else
16343 mp->prev->next = mp;
16345 minipool_vector_tail = mp;
16347 else
16349 if (max_address > max_mp->max_address - mp->fix_size)
16350 mp->max_address = max_mp->max_address - mp->fix_size;
16351 else
16352 mp->max_address = max_address;
16354 mp->next = max_mp;
16355 mp->prev = max_mp->prev;
16356 max_mp->prev = mp;
16357 if (mp->prev != NULL)
16358 mp->prev->next = mp;
16359 else
16360 minipool_vector_head = mp;
16363 /* Save the new entry. */
16364 max_mp = mp;
16366 /* Scan over the preceding entries and adjust their addresses as
16367 required. */
16368 while (mp->prev != NULL
16369 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16371 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16372 mp = mp->prev;
16375 return max_mp;
16378 static Mnode *
16379 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16380 HOST_WIDE_INT min_address)
16382 HOST_WIDE_INT offset;
16384 /* The code below assumes these are different. */
16385 gcc_assert (mp != min_mp);
16387 if (min_mp == NULL)
16389 if (min_address > mp->min_address)
16390 mp->min_address = min_address;
16392 else
16394 /* We will adjust this below if it is too loose. */
16395 mp->min_address = min_address;
16397 /* Unlink MP from its current position. Since min_mp is non-null,
16398 mp->next must be non-null. */
16399 mp->next->prev = mp->prev;
16400 if (mp->prev != NULL)
16401 mp->prev->next = mp->next;
16402 else
16403 minipool_vector_head = mp->next;
16405 /* Reinsert it after MIN_MP. */
16406 mp->prev = min_mp;
16407 mp->next = min_mp->next;
16408 min_mp->next = mp;
16409 if (mp->next != NULL)
16410 mp->next->prev = mp;
16411 else
16412 minipool_vector_tail = mp;
16415 min_mp = mp;
16417 offset = 0;
16418 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16420 mp->offset = offset;
16421 if (mp->refcount > 0)
16422 offset += mp->fix_size;
16424 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16425 mp->next->min_address = mp->min_address + mp->fix_size;
16428 return min_mp;
16431 /* Add a constant to the minipool for a backward reference. Returns the
16432 node added or NULL if the constant will not fit in this pool.
16434 Note that the code for insertion for a backwards reference can be
16435 somewhat confusing because the calculated offsets for each fix do
16436 not take into account the size of the pool (which is still under
16437 construction. */
16438 static Mnode *
16439 add_minipool_backward_ref (Mfix *fix)
16441 /* If set, min_mp is the last pool_entry that has a lower constraint
16442 than the one we are trying to add. */
16443 Mnode *min_mp = NULL;
16444 /* This can be negative, since it is only a constraint. */
16445 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16446 Mnode *mp;
16448 /* If we can't reach the current pool from this insn, or if we can't
16449 insert this entry at the end of the pool without pushing other
16450 fixes out of range, then we don't try. This ensures that we
16451 can't fail later on. */
16452 if (min_address >= minipool_barrier->address
16453 || (minipool_vector_tail->min_address + fix->fix_size
16454 >= minipool_barrier->address))
16455 return NULL;
16457 /* Scan the pool to see if a constant with the same value has
16458 already been added. While we are doing this, also note the
16459 location where we must insert the constant if it doesn't already
16460 exist. */
16461 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16463 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16464 && fix->mode == mp->mode
16465 && (!LABEL_P (fix->value)
16466 || (CODE_LABEL_NUMBER (fix->value)
16467 == CODE_LABEL_NUMBER (mp->value)))
16468 && rtx_equal_p (fix->value, mp->value)
16469 /* Check that there is enough slack to move this entry to the
16470 end of the table (this is conservative). */
16471 && (mp->max_address
16472 > (minipool_barrier->address
16473 + minipool_vector_tail->offset
16474 + minipool_vector_tail->fix_size)))
16476 mp->refcount++;
16477 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16480 if (min_mp != NULL)
16481 mp->min_address += fix->fix_size;
16482 else
16484 /* Note the insertion point if necessary. */
16485 if (mp->min_address < min_address)
16487 /* For now, we do not allow the insertion of 8-byte alignment
16488 requiring nodes anywhere but at the start of the pool. */
16489 if (ARM_DOUBLEWORD_ALIGN
16490 && fix->fix_size >= 8 && mp->fix_size < 8)
16491 return NULL;
16492 else
16493 min_mp = mp;
16495 else if (mp->max_address
16496 < minipool_barrier->address + mp->offset + fix->fix_size)
16498 /* Inserting before this entry would push the fix beyond
16499 its maximum address (which can happen if we have
16500 re-located a forwards fix); force the new fix to come
16501 after it. */
16502 if (ARM_DOUBLEWORD_ALIGN
16503 && fix->fix_size >= 8 && mp->fix_size < 8)
16504 return NULL;
16505 else
16507 min_mp = mp;
16508 min_address = mp->min_address + fix->fix_size;
16511 /* Do not insert a non-8-byte aligned quantity before 8-byte
16512 aligned quantities. */
16513 else if (ARM_DOUBLEWORD_ALIGN
16514 && fix->fix_size < 8
16515 && mp->fix_size >= 8)
16517 min_mp = mp;
16518 min_address = mp->min_address + fix->fix_size;
16523 /* We need to create a new entry. */
16524 mp = XNEW (Mnode);
16525 mp->fix_size = fix->fix_size;
16526 mp->mode = fix->mode;
16527 mp->value = fix->value;
16528 mp->refcount = 1;
16529 mp->max_address = minipool_barrier->address + 65536;
16531 mp->min_address = min_address;
16533 if (min_mp == NULL)
16535 mp->prev = NULL;
16536 mp->next = minipool_vector_head;
16538 if (mp->next == NULL)
16540 minipool_vector_tail = mp;
16541 minipool_vector_label = gen_label_rtx ();
16543 else
16544 mp->next->prev = mp;
16546 minipool_vector_head = mp;
16548 else
16550 mp->next = min_mp->next;
16551 mp->prev = min_mp;
16552 min_mp->next = mp;
16554 if (mp->next != NULL)
16555 mp->next->prev = mp;
16556 else
16557 minipool_vector_tail = mp;
16560 /* Save the new entry. */
16561 min_mp = mp;
16563 if (mp->prev)
16564 mp = mp->prev;
16565 else
16566 mp->offset = 0;
16568 /* Scan over the following entries and adjust their offsets. */
16569 while (mp->next != NULL)
16571 if (mp->next->min_address < mp->min_address + mp->fix_size)
16572 mp->next->min_address = mp->min_address + mp->fix_size;
16574 if (mp->refcount)
16575 mp->next->offset = mp->offset + mp->fix_size;
16576 else
16577 mp->next->offset = mp->offset;
16579 mp = mp->next;
16582 return min_mp;
16585 static void
16586 assign_minipool_offsets (Mfix *barrier)
16588 HOST_WIDE_INT offset = 0;
16589 Mnode *mp;
16591 minipool_barrier = barrier;
16593 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16595 mp->offset = offset;
16597 if (mp->refcount > 0)
16598 offset += mp->fix_size;
16602 /* Output the literal table */
16603 static void
16604 dump_minipool (rtx_insn *scan)
16606 Mnode * mp;
16607 Mnode * nmp;
16608 int align64 = 0;
16610 if (ARM_DOUBLEWORD_ALIGN)
16611 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16612 if (mp->refcount > 0 && mp->fix_size >= 8)
16614 align64 = 1;
16615 break;
16618 if (dump_file)
16619 fprintf (dump_file,
16620 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16621 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16623 scan = emit_label_after (gen_label_rtx (), scan);
16624 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16625 scan = emit_label_after (minipool_vector_label, scan);
16627 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16629 if (mp->refcount > 0)
16631 if (dump_file)
16633 fprintf (dump_file,
16634 ";; Offset %u, min %ld, max %ld ",
16635 (unsigned) mp->offset, (unsigned long) mp->min_address,
16636 (unsigned long) mp->max_address);
16637 arm_print_value (dump_file, mp->value);
16638 fputc ('\n', dump_file);
16641 switch (GET_MODE_SIZE (mp->mode))
16643 #ifdef HAVE_consttable_1
16644 case 1:
16645 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16646 break;
16648 #endif
16649 #ifdef HAVE_consttable_2
16650 case 2:
16651 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16652 break;
16654 #endif
16655 #ifdef HAVE_consttable_4
16656 case 4:
16657 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16658 break;
16660 #endif
16661 #ifdef HAVE_consttable_8
16662 case 8:
16663 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16664 break;
16666 #endif
16667 #ifdef HAVE_consttable_16
16668 case 16:
16669 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16670 break;
16672 #endif
16673 default:
16674 gcc_unreachable ();
16678 nmp = mp->next;
16679 free (mp);
16682 minipool_vector_head = minipool_vector_tail = NULL;
16683 scan = emit_insn_after (gen_consttable_end (), scan);
16684 scan = emit_barrier_after (scan);
16687 /* Return the cost of forcibly inserting a barrier after INSN. */
16688 static int
16689 arm_barrier_cost (rtx insn)
16691 /* Basing the location of the pool on the loop depth is preferable,
16692 but at the moment, the basic block information seems to be
16693 corrupt by this stage of the compilation. */
16694 int base_cost = 50;
16695 rtx next = next_nonnote_insn (insn);
16697 if (next != NULL && LABEL_P (next))
16698 base_cost -= 20;
16700 switch (GET_CODE (insn))
16702 case CODE_LABEL:
16703 /* It will always be better to place the table before the label, rather
16704 than after it. */
16705 return 50;
16707 case INSN:
16708 case CALL_INSN:
16709 return base_cost;
16711 case JUMP_INSN:
16712 return base_cost - 10;
16714 default:
16715 return base_cost + 10;
16719 /* Find the best place in the insn stream in the range
16720 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16721 Create the barrier by inserting a jump and add a new fix entry for
16722 it. */
16723 static Mfix *
16724 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16726 HOST_WIDE_INT count = 0;
16727 rtx_barrier *barrier;
16728 rtx_insn *from = fix->insn;
16729 /* The instruction after which we will insert the jump. */
16730 rtx_insn *selected = NULL;
16731 int selected_cost;
16732 /* The address at which the jump instruction will be placed. */
16733 HOST_WIDE_INT selected_address;
16734 Mfix * new_fix;
16735 HOST_WIDE_INT max_count = max_address - fix->address;
16736 rtx_code_label *label = gen_label_rtx ();
16738 selected_cost = arm_barrier_cost (from);
16739 selected_address = fix->address;
16741 while (from && count < max_count)
16743 rtx_jump_table_data *tmp;
16744 int new_cost;
16746 /* This code shouldn't have been called if there was a natural barrier
16747 within range. */
16748 gcc_assert (!BARRIER_P (from));
16750 /* Count the length of this insn. This must stay in sync with the
16751 code that pushes minipool fixes. */
16752 if (LABEL_P (from))
16753 count += get_label_padding (from);
16754 else
16755 count += get_attr_length (from);
16757 /* If there is a jump table, add its length. */
16758 if (tablejump_p (from, NULL, &tmp))
16760 count += get_jump_table_size (tmp);
16762 /* Jump tables aren't in a basic block, so base the cost on
16763 the dispatch insn. If we select this location, we will
16764 still put the pool after the table. */
16765 new_cost = arm_barrier_cost (from);
16767 if (count < max_count
16768 && (!selected || new_cost <= selected_cost))
16770 selected = tmp;
16771 selected_cost = new_cost;
16772 selected_address = fix->address + count;
16775 /* Continue after the dispatch table. */
16776 from = NEXT_INSN (tmp);
16777 continue;
16780 new_cost = arm_barrier_cost (from);
16782 if (count < max_count
16783 && (!selected || new_cost <= selected_cost))
16785 selected = from;
16786 selected_cost = new_cost;
16787 selected_address = fix->address + count;
16790 from = NEXT_INSN (from);
16793 /* Make sure that we found a place to insert the jump. */
16794 gcc_assert (selected);
16796 /* Make sure we do not split a call and its corresponding
16797 CALL_ARG_LOCATION note. */
16798 if (CALL_P (selected))
16800 rtx_insn *next = NEXT_INSN (selected);
16801 if (next && NOTE_P (next)
16802 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16803 selected = next;
16806 /* Create a new JUMP_INSN that branches around a barrier. */
16807 from = emit_jump_insn_after (gen_jump (label), selected);
16808 JUMP_LABEL (from) = label;
16809 barrier = emit_barrier_after (from);
16810 emit_label_after (label, barrier);
16812 /* Create a minipool barrier entry for the new barrier. */
16813 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16814 new_fix->insn = barrier;
16815 new_fix->address = selected_address;
16816 new_fix->next = fix->next;
16817 fix->next = new_fix;
16819 return new_fix;
16822 /* Record that there is a natural barrier in the insn stream at
16823 ADDRESS. */
16824 static void
16825 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16827 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16829 fix->insn = insn;
16830 fix->address = address;
16832 fix->next = NULL;
16833 if (minipool_fix_head != NULL)
16834 minipool_fix_tail->next = fix;
16835 else
16836 minipool_fix_head = fix;
16838 minipool_fix_tail = fix;
16841 /* Record INSN, which will need fixing up to load a value from the
16842 minipool. ADDRESS is the offset of the insn since the start of the
16843 function; LOC is a pointer to the part of the insn which requires
16844 fixing; VALUE is the constant that must be loaded, which is of type
16845 MODE. */
16846 static void
16847 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16848 machine_mode mode, rtx value)
16850 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16852 fix->insn = insn;
16853 fix->address = address;
16854 fix->loc = loc;
16855 fix->mode = mode;
16856 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16857 fix->value = value;
16858 fix->forwards = get_attr_pool_range (insn);
16859 fix->backwards = get_attr_neg_pool_range (insn);
16860 fix->minipool = NULL;
16862 /* If an insn doesn't have a range defined for it, then it isn't
16863 expecting to be reworked by this code. Better to stop now than
16864 to generate duff assembly code. */
16865 gcc_assert (fix->forwards || fix->backwards);
16867 /* If an entry requires 8-byte alignment then assume all constant pools
16868 require 4 bytes of padding. Trying to do this later on a per-pool
16869 basis is awkward because existing pool entries have to be modified. */
16870 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16871 minipool_pad = 4;
16873 if (dump_file)
16875 fprintf (dump_file,
16876 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16877 GET_MODE_NAME (mode),
16878 INSN_UID (insn), (unsigned long) address,
16879 -1 * (long)fix->backwards, (long)fix->forwards);
16880 arm_print_value (dump_file, fix->value);
16881 fprintf (dump_file, "\n");
16884 /* Add it to the chain of fixes. */
16885 fix->next = NULL;
16887 if (minipool_fix_head != NULL)
16888 minipool_fix_tail->next = fix;
16889 else
16890 minipool_fix_head = fix;
16892 minipool_fix_tail = fix;
16895 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16896 Returns the number of insns needed, or 99 if we always want to synthesize
16897 the value. */
16899 arm_max_const_double_inline_cost ()
16901 /* Let the value get synthesized to avoid the use of literal pools. */
16902 if (arm_disable_literal_pool)
16903 return 99;
16905 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16908 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16909 Returns the number of insns needed, or 99 if we don't know how to
16910 do it. */
16912 arm_const_double_inline_cost (rtx val)
16914 rtx lowpart, highpart;
16915 machine_mode mode;
16917 mode = GET_MODE (val);
16919 if (mode == VOIDmode)
16920 mode = DImode;
16922 gcc_assert (GET_MODE_SIZE (mode) == 8);
16924 lowpart = gen_lowpart (SImode, val);
16925 highpart = gen_highpart_mode (SImode, mode, val);
16927 gcc_assert (CONST_INT_P (lowpart));
16928 gcc_assert (CONST_INT_P (highpart));
16930 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16931 NULL_RTX, NULL_RTX, 0, 0)
16932 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16933 NULL_RTX, NULL_RTX, 0, 0));
16936 /* Cost of loading a SImode constant. */
16937 static inline int
16938 arm_const_inline_cost (enum rtx_code code, rtx val)
16940 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16941 NULL_RTX, NULL_RTX, 1, 0);
16944 /* Return true if it is worthwhile to split a 64-bit constant into two
16945 32-bit operations. This is the case if optimizing for size, or
16946 if we have load delay slots, or if one 32-bit part can be done with
16947 a single data operation. */
16948 bool
16949 arm_const_double_by_parts (rtx val)
16951 machine_mode mode = GET_MODE (val);
16952 rtx part;
16954 if (optimize_size || arm_ld_sched)
16955 return true;
16957 if (mode == VOIDmode)
16958 mode = DImode;
16960 part = gen_highpart_mode (SImode, mode, val);
16962 gcc_assert (CONST_INT_P (part));
16964 if (const_ok_for_arm (INTVAL (part))
16965 || const_ok_for_arm (~INTVAL (part)))
16966 return true;
16968 part = gen_lowpart (SImode, val);
16970 gcc_assert (CONST_INT_P (part));
16972 if (const_ok_for_arm (INTVAL (part))
16973 || const_ok_for_arm (~INTVAL (part)))
16974 return true;
16976 return false;
16979 /* Return true if it is possible to inline both the high and low parts
16980 of a 64-bit constant into 32-bit data processing instructions. */
16981 bool
16982 arm_const_double_by_immediates (rtx val)
16984 machine_mode mode = GET_MODE (val);
16985 rtx part;
16987 if (mode == VOIDmode)
16988 mode = DImode;
16990 part = gen_highpart_mode (SImode, mode, val);
16992 gcc_assert (CONST_INT_P (part));
16994 if (!const_ok_for_arm (INTVAL (part)))
16995 return false;
16997 part = gen_lowpart (SImode, val);
16999 gcc_assert (CONST_INT_P (part));
17001 if (!const_ok_for_arm (INTVAL (part)))
17002 return false;
17004 return true;
17007 /* Scan INSN and note any of its operands that need fixing.
17008 If DO_PUSHES is false we do not actually push any of the fixups
17009 needed. */
17010 static void
17011 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17013 int opno;
17015 extract_constrain_insn (insn);
17017 if (recog_data.n_alternatives == 0)
17018 return;
17020 /* Fill in recog_op_alt with information about the constraints of
17021 this insn. */
17022 preprocess_constraints (insn);
17024 const operand_alternative *op_alt = which_op_alt ();
17025 for (opno = 0; opno < recog_data.n_operands; opno++)
17027 /* Things we need to fix can only occur in inputs. */
17028 if (recog_data.operand_type[opno] != OP_IN)
17029 continue;
17031 /* If this alternative is a memory reference, then any mention
17032 of constants in this alternative is really to fool reload
17033 into allowing us to accept one there. We need to fix them up
17034 now so that we output the right code. */
17035 if (op_alt[opno].memory_ok)
17037 rtx op = recog_data.operand[opno];
17039 if (CONSTANT_P (op))
17041 if (do_pushes)
17042 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17043 recog_data.operand_mode[opno], op);
17045 else if (MEM_P (op)
17046 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17047 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17049 if (do_pushes)
17051 rtx cop = avoid_constant_pool_reference (op);
17053 /* Casting the address of something to a mode narrower
17054 than a word can cause avoid_constant_pool_reference()
17055 to return the pool reference itself. That's no good to
17056 us here. Lets just hope that we can use the
17057 constant pool value directly. */
17058 if (op == cop)
17059 cop = get_pool_constant (XEXP (op, 0));
17061 push_minipool_fix (insn, address,
17062 recog_data.operand_loc[opno],
17063 recog_data.operand_mode[opno], cop);
17070 return;
17073 /* Rewrite move insn into subtract of 0 if the condition codes will
17074 be useful in next conditional jump insn. */
17076 static void
17077 thumb1_reorg (void)
17079 basic_block bb;
17081 FOR_EACH_BB_FN (bb, cfun)
17083 rtx dest, src;
17084 rtx pat, op0, set = NULL;
17085 rtx_insn *prev, *insn = BB_END (bb);
17086 bool insn_clobbered = false;
17088 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17089 insn = PREV_INSN (insn);
17091 /* Find the last cbranchsi4_insn in basic block BB. */
17092 if (insn == BB_HEAD (bb)
17093 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17094 continue;
17096 /* Get the register with which we are comparing. */
17097 pat = PATTERN (insn);
17098 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
17100 /* Find the first flag setting insn before INSN in basic block BB. */
17101 gcc_assert (insn != BB_HEAD (bb));
17102 for (prev = PREV_INSN (insn);
17103 (!insn_clobbered
17104 && prev != BB_HEAD (bb)
17105 && (NOTE_P (prev)
17106 || DEBUG_INSN_P (prev)
17107 || ((set = single_set (prev)) != NULL
17108 && get_attr_conds (prev) == CONDS_NOCOND)));
17109 prev = PREV_INSN (prev))
17111 if (reg_set_p (op0, prev))
17112 insn_clobbered = true;
17115 /* Skip if op0 is clobbered by insn other than prev. */
17116 if (insn_clobbered)
17117 continue;
17119 if (!set)
17120 continue;
17122 dest = SET_DEST (set);
17123 src = SET_SRC (set);
17124 if (!low_register_operand (dest, SImode)
17125 || !low_register_operand (src, SImode))
17126 continue;
17128 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17129 in INSN. Both src and dest of the move insn are checked. */
17130 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17132 dest = copy_rtx (dest);
17133 src = copy_rtx (src);
17134 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17135 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
17136 INSN_CODE (prev) = -1;
17137 /* Set test register in INSN to dest. */
17138 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
17139 INSN_CODE (insn) = -1;
17144 /* Convert instructions to their cc-clobbering variant if possible, since
17145 that allows us to use smaller encodings. */
17147 static void
17148 thumb2_reorg (void)
17150 basic_block bb;
17151 regset_head live;
17153 INIT_REG_SET (&live);
17155 /* We are freeing block_for_insn in the toplev to keep compatibility
17156 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17157 compute_bb_for_insn ();
17158 df_analyze ();
17160 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17162 FOR_EACH_BB_FN (bb, cfun)
17164 if (current_tune->disparage_flag_setting_t16_encodings
17165 && optimize_bb_for_speed_p (bb))
17166 continue;
17168 rtx_insn *insn;
17169 Convert_Action action = SKIP;
17170 Convert_Action action_for_partial_flag_setting
17171 = (current_tune->disparage_partial_flag_setting_t16_encodings
17172 && optimize_bb_for_speed_p (bb))
17173 ? SKIP : CONV;
17175 COPY_REG_SET (&live, DF_LR_OUT (bb));
17176 df_simulate_initialize_backwards (bb, &live);
17177 FOR_BB_INSNS_REVERSE (bb, insn)
17179 if (NONJUMP_INSN_P (insn)
17180 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17181 && GET_CODE (PATTERN (insn)) == SET)
17183 action = SKIP;
17184 rtx pat = PATTERN (insn);
17185 rtx dst = XEXP (pat, 0);
17186 rtx src = XEXP (pat, 1);
17187 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17189 if (!OBJECT_P (src))
17190 op0 = XEXP (src, 0);
17192 if (BINARY_P (src))
17193 op1 = XEXP (src, 1);
17195 if (low_register_operand (dst, SImode))
17197 switch (GET_CODE (src))
17199 case PLUS:
17200 /* Adding two registers and storing the result
17201 in the first source is already a 16-bit
17202 operation. */
17203 if (rtx_equal_p (dst, op0)
17204 && register_operand (op1, SImode))
17205 break;
17207 if (low_register_operand (op0, SImode))
17209 /* ADDS <Rd>,<Rn>,<Rm> */
17210 if (low_register_operand (op1, SImode))
17211 action = CONV;
17212 /* ADDS <Rdn>,#<imm8> */
17213 /* SUBS <Rdn>,#<imm8> */
17214 else if (rtx_equal_p (dst, op0)
17215 && CONST_INT_P (op1)
17216 && IN_RANGE (INTVAL (op1), -255, 255))
17217 action = CONV;
17218 /* ADDS <Rd>,<Rn>,#<imm3> */
17219 /* SUBS <Rd>,<Rn>,#<imm3> */
17220 else if (CONST_INT_P (op1)
17221 && IN_RANGE (INTVAL (op1), -7, 7))
17222 action = CONV;
17224 /* ADCS <Rd>, <Rn> */
17225 else if (GET_CODE (XEXP (src, 0)) == PLUS
17226 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17227 && low_register_operand (XEXP (XEXP (src, 0), 1),
17228 SImode)
17229 && COMPARISON_P (op1)
17230 && cc_register (XEXP (op1, 0), VOIDmode)
17231 && maybe_get_arm_condition_code (op1) == ARM_CS
17232 && XEXP (op1, 1) == const0_rtx)
17233 action = CONV;
17234 break;
17236 case MINUS:
17237 /* RSBS <Rd>,<Rn>,#0
17238 Not handled here: see NEG below. */
17239 /* SUBS <Rd>,<Rn>,#<imm3>
17240 SUBS <Rdn>,#<imm8>
17241 Not handled here: see PLUS above. */
17242 /* SUBS <Rd>,<Rn>,<Rm> */
17243 if (low_register_operand (op0, SImode)
17244 && low_register_operand (op1, SImode))
17245 action = CONV;
17246 break;
17248 case MULT:
17249 /* MULS <Rdm>,<Rn>,<Rdm>
17250 As an exception to the rule, this is only used
17251 when optimizing for size since MULS is slow on all
17252 known implementations. We do not even want to use
17253 MULS in cold code, if optimizing for speed, so we
17254 test the global flag here. */
17255 if (!optimize_size)
17256 break;
17257 /* else fall through. */
17258 case AND:
17259 case IOR:
17260 case XOR:
17261 /* ANDS <Rdn>,<Rm> */
17262 if (rtx_equal_p (dst, op0)
17263 && low_register_operand (op1, SImode))
17264 action = action_for_partial_flag_setting;
17265 else if (rtx_equal_p (dst, op1)
17266 && low_register_operand (op0, SImode))
17267 action = action_for_partial_flag_setting == SKIP
17268 ? SKIP : SWAP_CONV;
17269 break;
17271 case ASHIFTRT:
17272 case ASHIFT:
17273 case LSHIFTRT:
17274 /* ASRS <Rdn>,<Rm> */
17275 /* LSRS <Rdn>,<Rm> */
17276 /* LSLS <Rdn>,<Rm> */
17277 if (rtx_equal_p (dst, op0)
17278 && low_register_operand (op1, SImode))
17279 action = action_for_partial_flag_setting;
17280 /* ASRS <Rd>,<Rm>,#<imm5> */
17281 /* LSRS <Rd>,<Rm>,#<imm5> */
17282 /* LSLS <Rd>,<Rm>,#<imm5> */
17283 else if (low_register_operand (op0, SImode)
17284 && CONST_INT_P (op1)
17285 && IN_RANGE (INTVAL (op1), 0, 31))
17286 action = action_for_partial_flag_setting;
17287 break;
17289 case ROTATERT:
17290 /* RORS <Rdn>,<Rm> */
17291 if (rtx_equal_p (dst, op0)
17292 && low_register_operand (op1, SImode))
17293 action = action_for_partial_flag_setting;
17294 break;
17296 case NOT:
17297 /* MVNS <Rd>,<Rm> */
17298 if (low_register_operand (op0, SImode))
17299 action = action_for_partial_flag_setting;
17300 break;
17302 case NEG:
17303 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17304 if (low_register_operand (op0, SImode))
17305 action = CONV;
17306 break;
17308 case CONST_INT:
17309 /* MOVS <Rd>,#<imm8> */
17310 if (CONST_INT_P (src)
17311 && IN_RANGE (INTVAL (src), 0, 255))
17312 action = action_for_partial_flag_setting;
17313 break;
17315 case REG:
17316 /* MOVS and MOV<c> with registers have different
17317 encodings, so are not relevant here. */
17318 break;
17320 default:
17321 break;
17325 if (action != SKIP)
17327 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17328 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17329 rtvec vec;
17331 if (action == SWAP_CONV)
17333 src = copy_rtx (src);
17334 XEXP (src, 0) = op1;
17335 XEXP (src, 1) = op0;
17336 pat = gen_rtx_SET (VOIDmode, dst, src);
17337 vec = gen_rtvec (2, pat, clobber);
17339 else /* action == CONV */
17340 vec = gen_rtvec (2, pat, clobber);
17342 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17343 INSN_CODE (insn) = -1;
17347 if (NONDEBUG_INSN_P (insn))
17348 df_simulate_one_insn_backwards (bb, insn, &live);
17352 CLEAR_REG_SET (&live);
17355 /* Gcc puts the pool in the wrong place for ARM, since we can only
17356 load addresses a limited distance around the pc. We do some
17357 special munging to move the constant pool values to the correct
17358 point in the code. */
17359 static void
17360 arm_reorg (void)
17362 rtx_insn *insn;
17363 HOST_WIDE_INT address = 0;
17364 Mfix * fix;
17366 if (TARGET_THUMB1)
17367 thumb1_reorg ();
17368 else if (TARGET_THUMB2)
17369 thumb2_reorg ();
17371 /* Ensure all insns that must be split have been split at this point.
17372 Otherwise, the pool placement code below may compute incorrect
17373 insn lengths. Note that when optimizing, all insns have already
17374 been split at this point. */
17375 if (!optimize)
17376 split_all_insns_noflow ();
17378 minipool_fix_head = minipool_fix_tail = NULL;
17380 /* The first insn must always be a note, or the code below won't
17381 scan it properly. */
17382 insn = get_insns ();
17383 gcc_assert (NOTE_P (insn));
17384 minipool_pad = 0;
17386 /* Scan all the insns and record the operands that will need fixing. */
17387 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17389 if (BARRIER_P (insn))
17390 push_minipool_barrier (insn, address);
17391 else if (INSN_P (insn))
17393 rtx_jump_table_data *table;
17395 note_invalid_constants (insn, address, true);
17396 address += get_attr_length (insn);
17398 /* If the insn is a vector jump, add the size of the table
17399 and skip the table. */
17400 if (tablejump_p (insn, NULL, &table))
17402 address += get_jump_table_size (table);
17403 insn = table;
17406 else if (LABEL_P (insn))
17407 /* Add the worst-case padding due to alignment. We don't add
17408 the _current_ padding because the minipool insertions
17409 themselves might change it. */
17410 address += get_label_padding (insn);
17413 fix = minipool_fix_head;
17415 /* Now scan the fixups and perform the required changes. */
17416 while (fix)
17418 Mfix * ftmp;
17419 Mfix * fdel;
17420 Mfix * last_added_fix;
17421 Mfix * last_barrier = NULL;
17422 Mfix * this_fix;
17424 /* Skip any further barriers before the next fix. */
17425 while (fix && BARRIER_P (fix->insn))
17426 fix = fix->next;
17428 /* No more fixes. */
17429 if (fix == NULL)
17430 break;
17432 last_added_fix = NULL;
17434 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17436 if (BARRIER_P (ftmp->insn))
17438 if (ftmp->address >= minipool_vector_head->max_address)
17439 break;
17441 last_barrier = ftmp;
17443 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17444 break;
17446 last_added_fix = ftmp; /* Keep track of the last fix added. */
17449 /* If we found a barrier, drop back to that; any fixes that we
17450 could have reached but come after the barrier will now go in
17451 the next mini-pool. */
17452 if (last_barrier != NULL)
17454 /* Reduce the refcount for those fixes that won't go into this
17455 pool after all. */
17456 for (fdel = last_barrier->next;
17457 fdel && fdel != ftmp;
17458 fdel = fdel->next)
17460 fdel->minipool->refcount--;
17461 fdel->minipool = NULL;
17464 ftmp = last_barrier;
17466 else
17468 /* ftmp is first fix that we can't fit into this pool and
17469 there no natural barriers that we could use. Insert a
17470 new barrier in the code somewhere between the previous
17471 fix and this one, and arrange to jump around it. */
17472 HOST_WIDE_INT max_address;
17474 /* The last item on the list of fixes must be a barrier, so
17475 we can never run off the end of the list of fixes without
17476 last_barrier being set. */
17477 gcc_assert (ftmp);
17479 max_address = minipool_vector_head->max_address;
17480 /* Check that there isn't another fix that is in range that
17481 we couldn't fit into this pool because the pool was
17482 already too large: we need to put the pool before such an
17483 instruction. The pool itself may come just after the
17484 fix because create_fix_barrier also allows space for a
17485 jump instruction. */
17486 if (ftmp->address < max_address)
17487 max_address = ftmp->address + 1;
17489 last_barrier = create_fix_barrier (last_added_fix, max_address);
17492 assign_minipool_offsets (last_barrier);
17494 while (ftmp)
17496 if (!BARRIER_P (ftmp->insn)
17497 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17498 == NULL))
17499 break;
17501 ftmp = ftmp->next;
17504 /* Scan over the fixes we have identified for this pool, fixing them
17505 up and adding the constants to the pool itself. */
17506 for (this_fix = fix; this_fix && ftmp != this_fix;
17507 this_fix = this_fix->next)
17508 if (!BARRIER_P (this_fix->insn))
17510 rtx addr
17511 = plus_constant (Pmode,
17512 gen_rtx_LABEL_REF (VOIDmode,
17513 minipool_vector_label),
17514 this_fix->minipool->offset);
17515 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17518 dump_minipool (last_barrier->insn);
17519 fix = ftmp;
17522 /* From now on we must synthesize any constants that we can't handle
17523 directly. This can happen if the RTL gets split during final
17524 instruction generation. */
17525 cfun->machine->after_arm_reorg = 1;
17527 /* Free the minipool memory. */
17528 obstack_free (&minipool_obstack, minipool_startobj);
17531 /* Routines to output assembly language. */
17533 /* Return string representation of passed in real value. */
17534 static const char *
17535 fp_const_from_val (REAL_VALUE_TYPE *r)
17537 if (!fp_consts_inited)
17538 init_fp_table ();
17540 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17541 return "0";
17544 /* OPERANDS[0] is the entire list of insns that constitute pop,
17545 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17546 is in the list, UPDATE is true iff the list contains explicit
17547 update of base register. */
17548 void
17549 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17550 bool update)
17552 int i;
17553 char pattern[100];
17554 int offset;
17555 const char *conditional;
17556 int num_saves = XVECLEN (operands[0], 0);
17557 unsigned int regno;
17558 unsigned int regno_base = REGNO (operands[1]);
17560 offset = 0;
17561 offset += update ? 1 : 0;
17562 offset += return_pc ? 1 : 0;
17564 /* Is the base register in the list? */
17565 for (i = offset; i < num_saves; i++)
17567 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17568 /* If SP is in the list, then the base register must be SP. */
17569 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17570 /* If base register is in the list, there must be no explicit update. */
17571 if (regno == regno_base)
17572 gcc_assert (!update);
17575 conditional = reverse ? "%?%D0" : "%?%d0";
17576 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17578 /* Output pop (not stmfd) because it has a shorter encoding. */
17579 gcc_assert (update);
17580 sprintf (pattern, "pop%s\t{", conditional);
17582 else
17584 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17585 It's just a convention, their semantics are identical. */
17586 if (regno_base == SP_REGNUM)
17587 sprintf (pattern, "ldm%sfd\t", conditional);
17588 else if (TARGET_UNIFIED_ASM)
17589 sprintf (pattern, "ldmia%s\t", conditional);
17590 else
17591 sprintf (pattern, "ldm%sia\t", conditional);
17593 strcat (pattern, reg_names[regno_base]);
17594 if (update)
17595 strcat (pattern, "!, {");
17596 else
17597 strcat (pattern, ", {");
17600 /* Output the first destination register. */
17601 strcat (pattern,
17602 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17604 /* Output the rest of the destination registers. */
17605 for (i = offset + 1; i < num_saves; i++)
17607 strcat (pattern, ", ");
17608 strcat (pattern,
17609 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17612 strcat (pattern, "}");
17614 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17615 strcat (pattern, "^");
17617 output_asm_insn (pattern, &cond);
17621 /* Output the assembly for a store multiple. */
17623 const char *
17624 vfp_output_vstmd (rtx * operands)
17626 char pattern[100];
17627 int p;
17628 int base;
17629 int i;
17630 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17631 ? XEXP (operands[0], 0)
17632 : XEXP (XEXP (operands[0], 0), 0);
17633 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17635 if (push_p)
17636 strcpy (pattern, "vpush%?.64\t{%P1");
17637 else
17638 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17640 p = strlen (pattern);
17642 gcc_assert (REG_P (operands[1]));
17644 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17645 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17647 p += sprintf (&pattern[p], ", d%d", base + i);
17649 strcpy (&pattern[p], "}");
17651 output_asm_insn (pattern, operands);
17652 return "";
17656 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17657 number of bytes pushed. */
17659 static int
17660 vfp_emit_fstmd (int base_reg, int count)
17662 rtx par;
17663 rtx dwarf;
17664 rtx tmp, reg;
17665 int i;
17667 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17668 register pairs are stored by a store multiple insn. We avoid this
17669 by pushing an extra pair. */
17670 if (count == 2 && !arm_arch6)
17672 if (base_reg == LAST_VFP_REGNUM - 3)
17673 base_reg -= 2;
17674 count++;
17677 /* FSTMD may not store more than 16 doubleword registers at once. Split
17678 larger stores into multiple parts (up to a maximum of two, in
17679 practice). */
17680 if (count > 16)
17682 int saved;
17683 /* NOTE: base_reg is an internal register number, so each D register
17684 counts as 2. */
17685 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17686 saved += vfp_emit_fstmd (base_reg, 16);
17687 return saved;
17690 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17691 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17693 reg = gen_rtx_REG (DFmode, base_reg);
17694 base_reg += 2;
17696 XVECEXP (par, 0, 0)
17697 = gen_rtx_SET (VOIDmode,
17698 gen_frame_mem
17699 (BLKmode,
17700 gen_rtx_PRE_MODIFY (Pmode,
17701 stack_pointer_rtx,
17702 plus_constant
17703 (Pmode, stack_pointer_rtx,
17704 - (count * 8)))
17706 gen_rtx_UNSPEC (BLKmode,
17707 gen_rtvec (1, reg),
17708 UNSPEC_PUSH_MULT));
17710 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17711 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17712 RTX_FRAME_RELATED_P (tmp) = 1;
17713 XVECEXP (dwarf, 0, 0) = tmp;
17715 tmp = gen_rtx_SET (VOIDmode,
17716 gen_frame_mem (DFmode, stack_pointer_rtx),
17717 reg);
17718 RTX_FRAME_RELATED_P (tmp) = 1;
17719 XVECEXP (dwarf, 0, 1) = tmp;
17721 for (i = 1; i < count; i++)
17723 reg = gen_rtx_REG (DFmode, base_reg);
17724 base_reg += 2;
17725 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17727 tmp = gen_rtx_SET (VOIDmode,
17728 gen_frame_mem (DFmode,
17729 plus_constant (Pmode,
17730 stack_pointer_rtx,
17731 i * 8)),
17732 reg);
17733 RTX_FRAME_RELATED_P (tmp) = 1;
17734 XVECEXP (dwarf, 0, i + 1) = tmp;
17737 par = emit_insn (par);
17738 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17739 RTX_FRAME_RELATED_P (par) = 1;
17741 return count * 8;
17744 /* Emit a call instruction with pattern PAT. ADDR is the address of
17745 the call target. */
17747 void
17748 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17750 rtx insn;
17752 insn = emit_call_insn (pat);
17754 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17755 If the call might use such an entry, add a use of the PIC register
17756 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17757 if (TARGET_VXWORKS_RTP
17758 && flag_pic
17759 && !sibcall
17760 && GET_CODE (addr) == SYMBOL_REF
17761 && (SYMBOL_REF_DECL (addr)
17762 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17763 : !SYMBOL_REF_LOCAL_P (addr)))
17765 require_pic_register ();
17766 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17769 if (TARGET_AAPCS_BASED)
17771 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17772 linker. We need to add an IP clobber to allow setting
17773 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17774 is not needed since it's a fixed register. */
17775 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17776 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17780 /* Output a 'call' insn. */
17781 const char *
17782 output_call (rtx *operands)
17784 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17786 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17787 if (REGNO (operands[0]) == LR_REGNUM)
17789 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17790 output_asm_insn ("mov%?\t%0, %|lr", operands);
17793 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17795 if (TARGET_INTERWORK || arm_arch4t)
17796 output_asm_insn ("bx%?\t%0", operands);
17797 else
17798 output_asm_insn ("mov%?\t%|pc, %0", operands);
17800 return "";
17803 /* Output a 'call' insn that is a reference in memory. This is
17804 disabled for ARMv5 and we prefer a blx instead because otherwise
17805 there's a significant performance overhead. */
17806 const char *
17807 output_call_mem (rtx *operands)
17809 gcc_assert (!arm_arch5);
17810 if (TARGET_INTERWORK)
17812 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17813 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17814 output_asm_insn ("bx%?\t%|ip", operands);
17816 else if (regno_use_in (LR_REGNUM, operands[0]))
17818 /* LR is used in the memory address. We load the address in the
17819 first instruction. It's safe to use IP as the target of the
17820 load since the call will kill it anyway. */
17821 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17822 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17823 if (arm_arch4t)
17824 output_asm_insn ("bx%?\t%|ip", operands);
17825 else
17826 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17828 else
17830 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17831 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17834 return "";
17838 /* Output a move from arm registers to arm registers of a long double
17839 OPERANDS[0] is the destination.
17840 OPERANDS[1] is the source. */
17841 const char *
17842 output_mov_long_double_arm_from_arm (rtx *operands)
17844 /* We have to be careful here because the two might overlap. */
17845 int dest_start = REGNO (operands[0]);
17846 int src_start = REGNO (operands[1]);
17847 rtx ops[2];
17848 int i;
17850 if (dest_start < src_start)
17852 for (i = 0; i < 3; i++)
17854 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17855 ops[1] = gen_rtx_REG (SImode, src_start + i);
17856 output_asm_insn ("mov%?\t%0, %1", ops);
17859 else
17861 for (i = 2; i >= 0; i--)
17863 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17864 ops[1] = gen_rtx_REG (SImode, src_start + i);
17865 output_asm_insn ("mov%?\t%0, %1", ops);
17869 return "";
17872 void
17873 arm_emit_movpair (rtx dest, rtx src)
17875 /* If the src is an immediate, simplify it. */
17876 if (CONST_INT_P (src))
17878 HOST_WIDE_INT val = INTVAL (src);
17879 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17880 if ((val >> 16) & 0x0000ffff)
17881 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17882 GEN_INT (16)),
17883 GEN_INT ((val >> 16) & 0x0000ffff));
17884 return;
17886 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17887 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17890 /* Output a move between double words. It must be REG<-MEM
17891 or MEM<-REG. */
17892 const char *
17893 output_move_double (rtx *operands, bool emit, int *count)
17895 enum rtx_code code0 = GET_CODE (operands[0]);
17896 enum rtx_code code1 = GET_CODE (operands[1]);
17897 rtx otherops[3];
17898 if (count)
17899 *count = 1;
17901 /* The only case when this might happen is when
17902 you are looking at the length of a DImode instruction
17903 that has an invalid constant in it. */
17904 if (code0 == REG && code1 != MEM)
17906 gcc_assert (!emit);
17907 *count = 2;
17908 return "";
17911 if (code0 == REG)
17913 unsigned int reg0 = REGNO (operands[0]);
17915 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17917 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17919 switch (GET_CODE (XEXP (operands[1], 0)))
17921 case REG:
17923 if (emit)
17925 if (TARGET_LDRD
17926 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17927 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17928 else
17929 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17931 break;
17933 case PRE_INC:
17934 gcc_assert (TARGET_LDRD);
17935 if (emit)
17936 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17937 break;
17939 case PRE_DEC:
17940 if (emit)
17942 if (TARGET_LDRD)
17943 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17944 else
17945 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17947 break;
17949 case POST_INC:
17950 if (emit)
17952 if (TARGET_LDRD)
17953 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17954 else
17955 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17957 break;
17959 case POST_DEC:
17960 gcc_assert (TARGET_LDRD);
17961 if (emit)
17962 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17963 break;
17965 case PRE_MODIFY:
17966 case POST_MODIFY:
17967 /* Autoicrement addressing modes should never have overlapping
17968 base and destination registers, and overlapping index registers
17969 are already prohibited, so this doesn't need to worry about
17970 fix_cm3_ldrd. */
17971 otherops[0] = operands[0];
17972 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17973 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17975 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17977 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17979 /* Registers overlap so split out the increment. */
17980 if (emit)
17982 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17983 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
17985 if (count)
17986 *count = 2;
17988 else
17990 /* Use a single insn if we can.
17991 FIXME: IWMMXT allows offsets larger than ldrd can
17992 handle, fix these up with a pair of ldr. */
17993 if (TARGET_THUMB2
17994 || !CONST_INT_P (otherops[2])
17995 || (INTVAL (otherops[2]) > -256
17996 && INTVAL (otherops[2]) < 256))
17998 if (emit)
17999 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
18001 else
18003 if (emit)
18005 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18006 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18008 if (count)
18009 *count = 2;
18014 else
18016 /* Use a single insn if we can.
18017 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18018 fix these up with a pair of ldr. */
18019 if (TARGET_THUMB2
18020 || !CONST_INT_P (otherops[2])
18021 || (INTVAL (otherops[2]) > -256
18022 && INTVAL (otherops[2]) < 256))
18024 if (emit)
18025 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
18027 else
18029 if (emit)
18031 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18032 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18034 if (count)
18035 *count = 2;
18038 break;
18040 case LABEL_REF:
18041 case CONST:
18042 /* We might be able to use ldrd %0, %1 here. However the range is
18043 different to ldr/adr, and it is broken on some ARMv7-M
18044 implementations. */
18045 /* Use the second register of the pair to avoid problematic
18046 overlap. */
18047 otherops[1] = operands[1];
18048 if (emit)
18049 output_asm_insn ("adr%?\t%0, %1", otherops);
18050 operands[1] = otherops[0];
18051 if (emit)
18053 if (TARGET_LDRD)
18054 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18055 else
18056 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
18059 if (count)
18060 *count = 2;
18061 break;
18063 /* ??? This needs checking for thumb2. */
18064 default:
18065 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18066 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18068 otherops[0] = operands[0];
18069 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18070 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18072 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18074 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18076 switch ((int) INTVAL (otherops[2]))
18078 case -8:
18079 if (emit)
18080 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
18081 return "";
18082 case -4:
18083 if (TARGET_THUMB2)
18084 break;
18085 if (emit)
18086 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
18087 return "";
18088 case 4:
18089 if (TARGET_THUMB2)
18090 break;
18091 if (emit)
18092 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
18093 return "";
18096 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18097 operands[1] = otherops[0];
18098 if (TARGET_LDRD
18099 && (REG_P (otherops[2])
18100 || TARGET_THUMB2
18101 || (CONST_INT_P (otherops[2])
18102 && INTVAL (otherops[2]) > -256
18103 && INTVAL (otherops[2]) < 256)))
18105 if (reg_overlap_mentioned_p (operands[0],
18106 otherops[2]))
18108 /* Swap base and index registers over to
18109 avoid a conflict. */
18110 std::swap (otherops[1], otherops[2]);
18112 /* If both registers conflict, it will usually
18113 have been fixed by a splitter. */
18114 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18115 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18117 if (emit)
18119 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18120 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18122 if (count)
18123 *count = 2;
18125 else
18127 otherops[0] = operands[0];
18128 if (emit)
18129 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
18131 return "";
18134 if (CONST_INT_P (otherops[2]))
18136 if (emit)
18138 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18139 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18140 else
18141 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18144 else
18146 if (emit)
18147 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18150 else
18152 if (emit)
18153 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18156 if (count)
18157 *count = 2;
18159 if (TARGET_LDRD)
18160 return "ldr%(d%)\t%0, [%1]";
18162 return "ldm%(ia%)\t%1, %M0";
18164 else
18166 otherops[1] = adjust_address (operands[1], SImode, 4);
18167 /* Take care of overlapping base/data reg. */
18168 if (reg_mentioned_p (operands[0], operands[1]))
18170 if (emit)
18172 output_asm_insn ("ldr%?\t%0, %1", otherops);
18173 output_asm_insn ("ldr%?\t%0, %1", operands);
18175 if (count)
18176 *count = 2;
18179 else
18181 if (emit)
18183 output_asm_insn ("ldr%?\t%0, %1", operands);
18184 output_asm_insn ("ldr%?\t%0, %1", otherops);
18186 if (count)
18187 *count = 2;
18192 else
18194 /* Constraints should ensure this. */
18195 gcc_assert (code0 == MEM && code1 == REG);
18196 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18197 || (TARGET_ARM && TARGET_LDRD));
18199 switch (GET_CODE (XEXP (operands[0], 0)))
18201 case REG:
18202 if (emit)
18204 if (TARGET_LDRD)
18205 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18206 else
18207 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18209 break;
18211 case PRE_INC:
18212 gcc_assert (TARGET_LDRD);
18213 if (emit)
18214 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18215 break;
18217 case PRE_DEC:
18218 if (emit)
18220 if (TARGET_LDRD)
18221 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18222 else
18223 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18225 break;
18227 case POST_INC:
18228 if (emit)
18230 if (TARGET_LDRD)
18231 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18232 else
18233 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18235 break;
18237 case POST_DEC:
18238 gcc_assert (TARGET_LDRD);
18239 if (emit)
18240 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18241 break;
18243 case PRE_MODIFY:
18244 case POST_MODIFY:
18245 otherops[0] = operands[1];
18246 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18247 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18249 /* IWMMXT allows offsets larger than ldrd can handle,
18250 fix these up with a pair of ldr. */
18251 if (!TARGET_THUMB2
18252 && CONST_INT_P (otherops[2])
18253 && (INTVAL(otherops[2]) <= -256
18254 || INTVAL(otherops[2]) >= 256))
18256 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18258 if (emit)
18260 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18261 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18263 if (count)
18264 *count = 2;
18266 else
18268 if (emit)
18270 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18271 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18273 if (count)
18274 *count = 2;
18277 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18279 if (emit)
18280 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18282 else
18284 if (emit)
18285 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18287 break;
18289 case PLUS:
18290 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18291 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18293 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18295 case -8:
18296 if (emit)
18297 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18298 return "";
18300 case -4:
18301 if (TARGET_THUMB2)
18302 break;
18303 if (emit)
18304 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18305 return "";
18307 case 4:
18308 if (TARGET_THUMB2)
18309 break;
18310 if (emit)
18311 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18312 return "";
18315 if (TARGET_LDRD
18316 && (REG_P (otherops[2])
18317 || TARGET_THUMB2
18318 || (CONST_INT_P (otherops[2])
18319 && INTVAL (otherops[2]) > -256
18320 && INTVAL (otherops[2]) < 256)))
18322 otherops[0] = operands[1];
18323 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18324 if (emit)
18325 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18326 return "";
18328 /* Fall through */
18330 default:
18331 otherops[0] = adjust_address (operands[0], SImode, 4);
18332 otherops[1] = operands[1];
18333 if (emit)
18335 output_asm_insn ("str%?\t%1, %0", operands);
18336 output_asm_insn ("str%?\t%H1, %0", otherops);
18338 if (count)
18339 *count = 2;
18343 return "";
18346 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18347 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18349 const char *
18350 output_move_quad (rtx *operands)
18352 if (REG_P (operands[0]))
18354 /* Load, or reg->reg move. */
18356 if (MEM_P (operands[1]))
18358 switch (GET_CODE (XEXP (operands[1], 0)))
18360 case REG:
18361 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18362 break;
18364 case LABEL_REF:
18365 case CONST:
18366 output_asm_insn ("adr%?\t%0, %1", operands);
18367 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18368 break;
18370 default:
18371 gcc_unreachable ();
18374 else
18376 rtx ops[2];
18377 int dest, src, i;
18379 gcc_assert (REG_P (operands[1]));
18381 dest = REGNO (operands[0]);
18382 src = REGNO (operands[1]);
18384 /* This seems pretty dumb, but hopefully GCC won't try to do it
18385 very often. */
18386 if (dest < src)
18387 for (i = 0; i < 4; i++)
18389 ops[0] = gen_rtx_REG (SImode, dest + i);
18390 ops[1] = gen_rtx_REG (SImode, src + i);
18391 output_asm_insn ("mov%?\t%0, %1", ops);
18393 else
18394 for (i = 3; i >= 0; i--)
18396 ops[0] = gen_rtx_REG (SImode, dest + i);
18397 ops[1] = gen_rtx_REG (SImode, src + i);
18398 output_asm_insn ("mov%?\t%0, %1", ops);
18402 else
18404 gcc_assert (MEM_P (operands[0]));
18405 gcc_assert (REG_P (operands[1]));
18406 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18408 switch (GET_CODE (XEXP (operands[0], 0)))
18410 case REG:
18411 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18412 break;
18414 default:
18415 gcc_unreachable ();
18419 return "";
18422 /* Output a VFP load or store instruction. */
18424 const char *
18425 output_move_vfp (rtx *operands)
18427 rtx reg, mem, addr, ops[2];
18428 int load = REG_P (operands[0]);
18429 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18430 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18431 const char *templ;
18432 char buff[50];
18433 machine_mode mode;
18435 reg = operands[!load];
18436 mem = operands[load];
18438 mode = GET_MODE (reg);
18440 gcc_assert (REG_P (reg));
18441 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18442 gcc_assert (mode == SFmode
18443 || mode == DFmode
18444 || mode == SImode
18445 || mode == DImode
18446 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18447 gcc_assert (MEM_P (mem));
18449 addr = XEXP (mem, 0);
18451 switch (GET_CODE (addr))
18453 case PRE_DEC:
18454 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18455 ops[0] = XEXP (addr, 0);
18456 ops[1] = reg;
18457 break;
18459 case POST_INC:
18460 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18461 ops[0] = XEXP (addr, 0);
18462 ops[1] = reg;
18463 break;
18465 default:
18466 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18467 ops[0] = reg;
18468 ops[1] = mem;
18469 break;
18472 sprintf (buff, templ,
18473 load ? "ld" : "st",
18474 dp ? "64" : "32",
18475 dp ? "P" : "",
18476 integer_p ? "\t%@ int" : "");
18477 output_asm_insn (buff, ops);
18479 return "";
18482 /* Output a Neon double-word or quad-word load or store, or a load
18483 or store for larger structure modes.
18485 WARNING: The ordering of elements is weird in big-endian mode,
18486 because the EABI requires that vectors stored in memory appear
18487 as though they were stored by a VSTM, as required by the EABI.
18488 GCC RTL defines element ordering based on in-memory order.
18489 This can be different from the architectural ordering of elements
18490 within a NEON register. The intrinsics defined in arm_neon.h use the
18491 NEON register element ordering, not the GCC RTL element ordering.
18493 For example, the in-memory ordering of a big-endian a quadword
18494 vector with 16-bit elements when stored from register pair {d0,d1}
18495 will be (lowest address first, d0[N] is NEON register element N):
18497 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18499 When necessary, quadword registers (dN, dN+1) are moved to ARM
18500 registers from rN in the order:
18502 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18504 So that STM/LDM can be used on vectors in ARM registers, and the
18505 same memory layout will result as if VSTM/VLDM were used.
18507 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18508 possible, which allows use of appropriate alignment tags.
18509 Note that the choice of "64" is independent of the actual vector
18510 element size; this size simply ensures that the behavior is
18511 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18513 Due to limitations of those instructions, use of VST1.64/VLD1.64
18514 is not possible if:
18515 - the address contains PRE_DEC, or
18516 - the mode refers to more than 4 double-word registers
18518 In those cases, it would be possible to replace VSTM/VLDM by a
18519 sequence of instructions; this is not currently implemented since
18520 this is not certain to actually improve performance. */
18522 const char *
18523 output_move_neon (rtx *operands)
18525 rtx reg, mem, addr, ops[2];
18526 int regno, nregs, load = REG_P (operands[0]);
18527 const char *templ;
18528 char buff[50];
18529 machine_mode mode;
18531 reg = operands[!load];
18532 mem = operands[load];
18534 mode = GET_MODE (reg);
18536 gcc_assert (REG_P (reg));
18537 regno = REGNO (reg);
18538 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18539 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18540 || NEON_REGNO_OK_FOR_QUAD (regno));
18541 gcc_assert (VALID_NEON_DREG_MODE (mode)
18542 || VALID_NEON_QREG_MODE (mode)
18543 || VALID_NEON_STRUCT_MODE (mode));
18544 gcc_assert (MEM_P (mem));
18546 addr = XEXP (mem, 0);
18548 /* Strip off const from addresses like (const (plus (...))). */
18549 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18550 addr = XEXP (addr, 0);
18552 switch (GET_CODE (addr))
18554 case POST_INC:
18555 /* We have to use vldm / vstm for too-large modes. */
18556 if (nregs > 4)
18558 templ = "v%smia%%?\t%%0!, %%h1";
18559 ops[0] = XEXP (addr, 0);
18561 else
18563 templ = "v%s1.64\t%%h1, %%A0";
18564 ops[0] = mem;
18566 ops[1] = reg;
18567 break;
18569 case PRE_DEC:
18570 /* We have to use vldm / vstm in this case, since there is no
18571 pre-decrement form of the vld1 / vst1 instructions. */
18572 templ = "v%smdb%%?\t%%0!, %%h1";
18573 ops[0] = XEXP (addr, 0);
18574 ops[1] = reg;
18575 break;
18577 case POST_MODIFY:
18578 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18579 gcc_unreachable ();
18581 case REG:
18582 /* We have to use vldm / vstm for too-large modes. */
18583 if (nregs > 1)
18585 if (nregs > 4)
18586 templ = "v%smia%%?\t%%m0, %%h1";
18587 else
18588 templ = "v%s1.64\t%%h1, %%A0";
18590 ops[0] = mem;
18591 ops[1] = reg;
18592 break;
18594 /* Fall through. */
18595 case LABEL_REF:
18596 case PLUS:
18598 int i;
18599 int overlap = -1;
18600 for (i = 0; i < nregs; i++)
18602 /* We're only using DImode here because it's a convenient size. */
18603 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18604 ops[1] = adjust_address (mem, DImode, 8 * i);
18605 if (reg_overlap_mentioned_p (ops[0], mem))
18607 gcc_assert (overlap == -1);
18608 overlap = i;
18610 else
18612 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18613 output_asm_insn (buff, ops);
18616 if (overlap != -1)
18618 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18619 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18620 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18621 output_asm_insn (buff, ops);
18624 return "";
18627 default:
18628 gcc_unreachable ();
18631 sprintf (buff, templ, load ? "ld" : "st");
18632 output_asm_insn (buff, ops);
18634 return "";
18637 /* Compute and return the length of neon_mov<mode>, where <mode> is
18638 one of VSTRUCT modes: EI, OI, CI or XI. */
18640 arm_attr_length_move_neon (rtx_insn *insn)
18642 rtx reg, mem, addr;
18643 int load;
18644 machine_mode mode;
18646 extract_insn_cached (insn);
18648 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18650 mode = GET_MODE (recog_data.operand[0]);
18651 switch (mode)
18653 case EImode:
18654 case OImode:
18655 return 8;
18656 case CImode:
18657 return 12;
18658 case XImode:
18659 return 16;
18660 default:
18661 gcc_unreachable ();
18665 load = REG_P (recog_data.operand[0]);
18666 reg = recog_data.operand[!load];
18667 mem = recog_data.operand[load];
18669 gcc_assert (MEM_P (mem));
18671 mode = GET_MODE (reg);
18672 addr = XEXP (mem, 0);
18674 /* Strip off const from addresses like (const (plus (...))). */
18675 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18676 addr = XEXP (addr, 0);
18678 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18680 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18681 return insns * 4;
18683 else
18684 return 4;
18687 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18688 return zero. */
18691 arm_address_offset_is_imm (rtx_insn *insn)
18693 rtx mem, addr;
18695 extract_insn_cached (insn);
18697 if (REG_P (recog_data.operand[0]))
18698 return 0;
18700 mem = recog_data.operand[0];
18702 gcc_assert (MEM_P (mem));
18704 addr = XEXP (mem, 0);
18706 if (REG_P (addr)
18707 || (GET_CODE (addr) == PLUS
18708 && REG_P (XEXP (addr, 0))
18709 && CONST_INT_P (XEXP (addr, 1))))
18710 return 1;
18711 else
18712 return 0;
18715 /* Output an ADD r, s, #n where n may be too big for one instruction.
18716 If adding zero to one register, output nothing. */
18717 const char *
18718 output_add_immediate (rtx *operands)
18720 HOST_WIDE_INT n = INTVAL (operands[2]);
18722 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18724 if (n < 0)
18725 output_multi_immediate (operands,
18726 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18727 -n);
18728 else
18729 output_multi_immediate (operands,
18730 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18734 return "";
18737 /* Output a multiple immediate operation.
18738 OPERANDS is the vector of operands referred to in the output patterns.
18739 INSTR1 is the output pattern to use for the first constant.
18740 INSTR2 is the output pattern to use for subsequent constants.
18741 IMMED_OP is the index of the constant slot in OPERANDS.
18742 N is the constant value. */
18743 static const char *
18744 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18745 int immed_op, HOST_WIDE_INT n)
18747 #if HOST_BITS_PER_WIDE_INT > 32
18748 n &= 0xffffffff;
18749 #endif
18751 if (n == 0)
18753 /* Quick and easy output. */
18754 operands[immed_op] = const0_rtx;
18755 output_asm_insn (instr1, operands);
18757 else
18759 int i;
18760 const char * instr = instr1;
18762 /* Note that n is never zero here (which would give no output). */
18763 for (i = 0; i < 32; i += 2)
18765 if (n & (3 << i))
18767 operands[immed_op] = GEN_INT (n & (255 << i));
18768 output_asm_insn (instr, operands);
18769 instr = instr2;
18770 i += 6;
18775 return "";
18778 /* Return the name of a shifter operation. */
18779 static const char *
18780 arm_shift_nmem(enum rtx_code code)
18782 switch (code)
18784 case ASHIFT:
18785 return ARM_LSL_NAME;
18787 case ASHIFTRT:
18788 return "asr";
18790 case LSHIFTRT:
18791 return "lsr";
18793 case ROTATERT:
18794 return "ror";
18796 default:
18797 abort();
18801 /* Return the appropriate ARM instruction for the operation code.
18802 The returned result should not be overwritten. OP is the rtx of the
18803 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18804 was shifted. */
18805 const char *
18806 arithmetic_instr (rtx op, int shift_first_arg)
18808 switch (GET_CODE (op))
18810 case PLUS:
18811 return "add";
18813 case MINUS:
18814 return shift_first_arg ? "rsb" : "sub";
18816 case IOR:
18817 return "orr";
18819 case XOR:
18820 return "eor";
18822 case AND:
18823 return "and";
18825 case ASHIFT:
18826 case ASHIFTRT:
18827 case LSHIFTRT:
18828 case ROTATERT:
18829 return arm_shift_nmem(GET_CODE(op));
18831 default:
18832 gcc_unreachable ();
18836 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18837 for the operation code. The returned result should not be overwritten.
18838 OP is the rtx code of the shift.
18839 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18840 shift. */
18841 static const char *
18842 shift_op (rtx op, HOST_WIDE_INT *amountp)
18844 const char * mnem;
18845 enum rtx_code code = GET_CODE (op);
18847 switch (code)
18849 case ROTATE:
18850 if (!CONST_INT_P (XEXP (op, 1)))
18852 output_operand_lossage ("invalid shift operand");
18853 return NULL;
18856 code = ROTATERT;
18857 *amountp = 32 - INTVAL (XEXP (op, 1));
18858 mnem = "ror";
18859 break;
18861 case ASHIFT:
18862 case ASHIFTRT:
18863 case LSHIFTRT:
18864 case ROTATERT:
18865 mnem = arm_shift_nmem(code);
18866 if (CONST_INT_P (XEXP (op, 1)))
18868 *amountp = INTVAL (XEXP (op, 1));
18870 else if (REG_P (XEXP (op, 1)))
18872 *amountp = -1;
18873 return mnem;
18875 else
18877 output_operand_lossage ("invalid shift operand");
18878 return NULL;
18880 break;
18882 case MULT:
18883 /* We never have to worry about the amount being other than a
18884 power of 2, since this case can never be reloaded from a reg. */
18885 if (!CONST_INT_P (XEXP (op, 1)))
18887 output_operand_lossage ("invalid shift operand");
18888 return NULL;
18891 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18893 /* Amount must be a power of two. */
18894 if (*amountp & (*amountp - 1))
18896 output_operand_lossage ("invalid shift operand");
18897 return NULL;
18900 *amountp = int_log2 (*amountp);
18901 return ARM_LSL_NAME;
18903 default:
18904 output_operand_lossage ("invalid shift operand");
18905 return NULL;
18908 /* This is not 100% correct, but follows from the desire to merge
18909 multiplication by a power of 2 with the recognizer for a
18910 shift. >=32 is not a valid shift for "lsl", so we must try and
18911 output a shift that produces the correct arithmetical result.
18912 Using lsr #32 is identical except for the fact that the carry bit
18913 is not set correctly if we set the flags; but we never use the
18914 carry bit from such an operation, so we can ignore that. */
18915 if (code == ROTATERT)
18916 /* Rotate is just modulo 32. */
18917 *amountp &= 31;
18918 else if (*amountp != (*amountp & 31))
18920 if (code == ASHIFT)
18921 mnem = "lsr";
18922 *amountp = 32;
18925 /* Shifts of 0 are no-ops. */
18926 if (*amountp == 0)
18927 return NULL;
18929 return mnem;
18932 /* Obtain the shift from the POWER of two. */
18934 static HOST_WIDE_INT
18935 int_log2 (HOST_WIDE_INT power)
18937 HOST_WIDE_INT shift = 0;
18939 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18941 gcc_assert (shift <= 31);
18942 shift++;
18945 return shift;
18948 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18949 because /bin/as is horribly restrictive. The judgement about
18950 whether or not each character is 'printable' (and can be output as
18951 is) or not (and must be printed with an octal escape) must be made
18952 with reference to the *host* character set -- the situation is
18953 similar to that discussed in the comments above pp_c_char in
18954 c-pretty-print.c. */
18956 #define MAX_ASCII_LEN 51
18958 void
18959 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18961 int i;
18962 int len_so_far = 0;
18964 fputs ("\t.ascii\t\"", stream);
18966 for (i = 0; i < len; i++)
18968 int c = p[i];
18970 if (len_so_far >= MAX_ASCII_LEN)
18972 fputs ("\"\n\t.ascii\t\"", stream);
18973 len_so_far = 0;
18976 if (ISPRINT (c))
18978 if (c == '\\' || c == '\"')
18980 putc ('\\', stream);
18981 len_so_far++;
18983 putc (c, stream);
18984 len_so_far++;
18986 else
18988 fprintf (stream, "\\%03o", c);
18989 len_so_far += 4;
18993 fputs ("\"\n", stream);
18996 /* Compute the register save mask for registers 0 through 12
18997 inclusive. This code is used by arm_compute_save_reg_mask. */
18999 static unsigned long
19000 arm_compute_save_reg0_reg12_mask (void)
19002 unsigned long func_type = arm_current_func_type ();
19003 unsigned long save_reg_mask = 0;
19004 unsigned int reg;
19006 if (IS_INTERRUPT (func_type))
19008 unsigned int max_reg;
19009 /* Interrupt functions must not corrupt any registers,
19010 even call clobbered ones. If this is a leaf function
19011 we can just examine the registers used by the RTL, but
19012 otherwise we have to assume that whatever function is
19013 called might clobber anything, and so we have to save
19014 all the call-clobbered registers as well. */
19015 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19016 /* FIQ handlers have registers r8 - r12 banked, so
19017 we only need to check r0 - r7, Normal ISRs only
19018 bank r14 and r15, so we must check up to r12.
19019 r13 is the stack pointer which is always preserved,
19020 so we do not need to consider it here. */
19021 max_reg = 7;
19022 else
19023 max_reg = 12;
19025 for (reg = 0; reg <= max_reg; reg++)
19026 if (df_regs_ever_live_p (reg)
19027 || (! crtl->is_leaf && call_used_regs[reg]))
19028 save_reg_mask |= (1 << reg);
19030 /* Also save the pic base register if necessary. */
19031 if (flag_pic
19032 && !TARGET_SINGLE_PIC_BASE
19033 && arm_pic_register != INVALID_REGNUM
19034 && crtl->uses_pic_offset_table)
19035 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19037 else if (IS_VOLATILE(func_type))
19039 /* For noreturn functions we historically omitted register saves
19040 altogether. However this really messes up debugging. As a
19041 compromise save just the frame pointers. Combined with the link
19042 register saved elsewhere this should be sufficient to get
19043 a backtrace. */
19044 if (frame_pointer_needed)
19045 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19046 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19047 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19048 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19049 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19051 else
19053 /* In the normal case we only need to save those registers
19054 which are call saved and which are used by this function. */
19055 for (reg = 0; reg <= 11; reg++)
19056 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
19057 save_reg_mask |= (1 << reg);
19059 /* Handle the frame pointer as a special case. */
19060 if (frame_pointer_needed)
19061 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19063 /* If we aren't loading the PIC register,
19064 don't stack it even though it may be live. */
19065 if (flag_pic
19066 && !TARGET_SINGLE_PIC_BASE
19067 && arm_pic_register != INVALID_REGNUM
19068 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19069 || crtl->uses_pic_offset_table))
19070 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19072 /* The prologue will copy SP into R0, so save it. */
19073 if (IS_STACKALIGN (func_type))
19074 save_reg_mask |= 1;
19077 /* Save registers so the exception handler can modify them. */
19078 if (crtl->calls_eh_return)
19080 unsigned int i;
19082 for (i = 0; ; i++)
19084 reg = EH_RETURN_DATA_REGNO (i);
19085 if (reg == INVALID_REGNUM)
19086 break;
19087 save_reg_mask |= 1 << reg;
19091 return save_reg_mask;
19094 /* Return true if r3 is live at the start of the function. */
19096 static bool
19097 arm_r3_live_at_start_p (void)
19099 /* Just look at cfg info, which is still close enough to correct at this
19100 point. This gives false positives for broken functions that might use
19101 uninitialized data that happens to be allocated in r3, but who cares? */
19102 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19105 /* Compute the number of bytes used to store the static chain register on the
19106 stack, above the stack frame. We need to know this accurately to get the
19107 alignment of the rest of the stack frame correct. */
19109 static int
19110 arm_compute_static_chain_stack_bytes (void)
19112 /* See the defining assertion in arm_expand_prologue. */
19113 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
19114 && IS_NESTED (arm_current_func_type ())
19115 && arm_r3_live_at_start_p ()
19116 && crtl->args.pretend_args_size == 0)
19117 return 4;
19119 return 0;
19122 /* Compute a bit mask of which registers need to be
19123 saved on the stack for the current function.
19124 This is used by arm_get_frame_offsets, which may add extra registers. */
19126 static unsigned long
19127 arm_compute_save_reg_mask (void)
19129 unsigned int save_reg_mask = 0;
19130 unsigned long func_type = arm_current_func_type ();
19131 unsigned int reg;
19133 if (IS_NAKED (func_type))
19134 /* This should never really happen. */
19135 return 0;
19137 /* If we are creating a stack frame, then we must save the frame pointer,
19138 IP (which will hold the old stack pointer), LR and the PC. */
19139 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19140 save_reg_mask |=
19141 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19142 | (1 << IP_REGNUM)
19143 | (1 << LR_REGNUM)
19144 | (1 << PC_REGNUM);
19146 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19148 /* Decide if we need to save the link register.
19149 Interrupt routines have their own banked link register,
19150 so they never need to save it.
19151 Otherwise if we do not use the link register we do not need to save
19152 it. If we are pushing other registers onto the stack however, we
19153 can save an instruction in the epilogue by pushing the link register
19154 now and then popping it back into the PC. This incurs extra memory
19155 accesses though, so we only do it when optimizing for size, and only
19156 if we know that we will not need a fancy return sequence. */
19157 if (df_regs_ever_live_p (LR_REGNUM)
19158 || (save_reg_mask
19159 && optimize_size
19160 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19161 && !crtl->calls_eh_return))
19162 save_reg_mask |= 1 << LR_REGNUM;
19164 if (cfun->machine->lr_save_eliminated)
19165 save_reg_mask &= ~ (1 << LR_REGNUM);
19167 if (TARGET_REALLY_IWMMXT
19168 && ((bit_count (save_reg_mask)
19169 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19170 arm_compute_static_chain_stack_bytes())
19171 ) % 2) != 0)
19173 /* The total number of registers that are going to be pushed
19174 onto the stack is odd. We need to ensure that the stack
19175 is 64-bit aligned before we start to save iWMMXt registers,
19176 and also before we start to create locals. (A local variable
19177 might be a double or long long which we will load/store using
19178 an iWMMXt instruction). Therefore we need to push another
19179 ARM register, so that the stack will be 64-bit aligned. We
19180 try to avoid using the arg registers (r0 -r3) as they might be
19181 used to pass values in a tail call. */
19182 for (reg = 4; reg <= 12; reg++)
19183 if ((save_reg_mask & (1 << reg)) == 0)
19184 break;
19186 if (reg <= 12)
19187 save_reg_mask |= (1 << reg);
19188 else
19190 cfun->machine->sibcall_blocked = 1;
19191 save_reg_mask |= (1 << 3);
19195 /* We may need to push an additional register for use initializing the
19196 PIC base register. */
19197 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19198 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19200 reg = thumb_find_work_register (1 << 4);
19201 if (!call_used_regs[reg])
19202 save_reg_mask |= (1 << reg);
19205 return save_reg_mask;
19209 /* Compute a bit mask of which registers need to be
19210 saved on the stack for the current function. */
19211 static unsigned long
19212 thumb1_compute_save_reg_mask (void)
19214 unsigned long mask;
19215 unsigned reg;
19217 mask = 0;
19218 for (reg = 0; reg < 12; reg ++)
19219 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
19220 mask |= 1 << reg;
19222 if (flag_pic
19223 && !TARGET_SINGLE_PIC_BASE
19224 && arm_pic_register != INVALID_REGNUM
19225 && crtl->uses_pic_offset_table)
19226 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19228 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19229 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19230 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19232 /* LR will also be pushed if any lo regs are pushed. */
19233 if (mask & 0xff || thumb_force_lr_save ())
19234 mask |= (1 << LR_REGNUM);
19236 /* Make sure we have a low work register if we need one.
19237 We will need one if we are going to push a high register,
19238 but we are not currently intending to push a low register. */
19239 if ((mask & 0xff) == 0
19240 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19242 /* Use thumb_find_work_register to choose which register
19243 we will use. If the register is live then we will
19244 have to push it. Use LAST_LO_REGNUM as our fallback
19245 choice for the register to select. */
19246 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19247 /* Make sure the register returned by thumb_find_work_register is
19248 not part of the return value. */
19249 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19250 reg = LAST_LO_REGNUM;
19252 if (! call_used_regs[reg])
19253 mask |= 1 << reg;
19256 /* The 504 below is 8 bytes less than 512 because there are two possible
19257 alignment words. We can't tell here if they will be present or not so we
19258 have to play it safe and assume that they are. */
19259 if ((CALLER_INTERWORKING_SLOT_SIZE +
19260 ROUND_UP_WORD (get_frame_size ()) +
19261 crtl->outgoing_args_size) >= 504)
19263 /* This is the same as the code in thumb1_expand_prologue() which
19264 determines which register to use for stack decrement. */
19265 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19266 if (mask & (1 << reg))
19267 break;
19269 if (reg > LAST_LO_REGNUM)
19271 /* Make sure we have a register available for stack decrement. */
19272 mask |= 1 << LAST_LO_REGNUM;
19276 return mask;
19280 /* Return the number of bytes required to save VFP registers. */
19281 static int
19282 arm_get_vfp_saved_size (void)
19284 unsigned int regno;
19285 int count;
19286 int saved;
19288 saved = 0;
19289 /* Space for saved VFP registers. */
19290 if (TARGET_HARD_FLOAT && TARGET_VFP)
19292 count = 0;
19293 for (regno = FIRST_VFP_REGNUM;
19294 regno < LAST_VFP_REGNUM;
19295 regno += 2)
19297 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19298 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19300 if (count > 0)
19302 /* Workaround ARM10 VFPr1 bug. */
19303 if (count == 2 && !arm_arch6)
19304 count++;
19305 saved += count * 8;
19307 count = 0;
19309 else
19310 count++;
19312 if (count > 0)
19314 if (count == 2 && !arm_arch6)
19315 count++;
19316 saved += count * 8;
19319 return saved;
19323 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19324 everything bar the final return instruction. If simple_return is true,
19325 then do not output epilogue, because it has already been emitted in RTL. */
19326 const char *
19327 output_return_instruction (rtx operand, bool really_return, bool reverse,
19328 bool simple_return)
19330 char conditional[10];
19331 char instr[100];
19332 unsigned reg;
19333 unsigned long live_regs_mask;
19334 unsigned long func_type;
19335 arm_stack_offsets *offsets;
19337 func_type = arm_current_func_type ();
19339 if (IS_NAKED (func_type))
19340 return "";
19342 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19344 /* If this function was declared non-returning, and we have
19345 found a tail call, then we have to trust that the called
19346 function won't return. */
19347 if (really_return)
19349 rtx ops[2];
19351 /* Otherwise, trap an attempted return by aborting. */
19352 ops[0] = operand;
19353 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19354 : "abort");
19355 assemble_external_libcall (ops[1]);
19356 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19359 return "";
19362 gcc_assert (!cfun->calls_alloca || really_return);
19364 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19366 cfun->machine->return_used_this_function = 1;
19368 offsets = arm_get_frame_offsets ();
19369 live_regs_mask = offsets->saved_regs_mask;
19371 if (!simple_return && live_regs_mask)
19373 const char * return_reg;
19375 /* If we do not have any special requirements for function exit
19376 (e.g. interworking) then we can load the return address
19377 directly into the PC. Otherwise we must load it into LR. */
19378 if (really_return
19379 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19380 return_reg = reg_names[PC_REGNUM];
19381 else
19382 return_reg = reg_names[LR_REGNUM];
19384 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19386 /* There are three possible reasons for the IP register
19387 being saved. 1) a stack frame was created, in which case
19388 IP contains the old stack pointer, or 2) an ISR routine
19389 corrupted it, or 3) it was saved to align the stack on
19390 iWMMXt. In case 1, restore IP into SP, otherwise just
19391 restore IP. */
19392 if (frame_pointer_needed)
19394 live_regs_mask &= ~ (1 << IP_REGNUM);
19395 live_regs_mask |= (1 << SP_REGNUM);
19397 else
19398 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19401 /* On some ARM architectures it is faster to use LDR rather than
19402 LDM to load a single register. On other architectures, the
19403 cost is the same. In 26 bit mode, or for exception handlers,
19404 we have to use LDM to load the PC so that the CPSR is also
19405 restored. */
19406 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19407 if (live_regs_mask == (1U << reg))
19408 break;
19410 if (reg <= LAST_ARM_REGNUM
19411 && (reg != LR_REGNUM
19412 || ! really_return
19413 || ! IS_INTERRUPT (func_type)))
19415 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19416 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19418 else
19420 char *p;
19421 int first = 1;
19423 /* Generate the load multiple instruction to restore the
19424 registers. Note we can get here, even if
19425 frame_pointer_needed is true, but only if sp already
19426 points to the base of the saved core registers. */
19427 if (live_regs_mask & (1 << SP_REGNUM))
19429 unsigned HOST_WIDE_INT stack_adjust;
19431 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19432 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19434 if (stack_adjust && arm_arch5 && TARGET_ARM)
19435 if (TARGET_UNIFIED_ASM)
19436 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19437 else
19438 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19439 else
19441 /* If we can't use ldmib (SA110 bug),
19442 then try to pop r3 instead. */
19443 if (stack_adjust)
19444 live_regs_mask |= 1 << 3;
19446 if (TARGET_UNIFIED_ASM)
19447 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19448 else
19449 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19452 else
19453 if (TARGET_UNIFIED_ASM)
19454 sprintf (instr, "pop%s\t{", conditional);
19455 else
19456 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19458 p = instr + strlen (instr);
19460 for (reg = 0; reg <= SP_REGNUM; reg++)
19461 if (live_regs_mask & (1 << reg))
19463 int l = strlen (reg_names[reg]);
19465 if (first)
19466 first = 0;
19467 else
19469 memcpy (p, ", ", 2);
19470 p += 2;
19473 memcpy (p, "%|", 2);
19474 memcpy (p + 2, reg_names[reg], l);
19475 p += l + 2;
19478 if (live_regs_mask & (1 << LR_REGNUM))
19480 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19481 /* If returning from an interrupt, restore the CPSR. */
19482 if (IS_INTERRUPT (func_type))
19483 strcat (p, "^");
19485 else
19486 strcpy (p, "}");
19489 output_asm_insn (instr, & operand);
19491 /* See if we need to generate an extra instruction to
19492 perform the actual function return. */
19493 if (really_return
19494 && func_type != ARM_FT_INTERWORKED
19495 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19497 /* The return has already been handled
19498 by loading the LR into the PC. */
19499 return "";
19503 if (really_return)
19505 switch ((int) ARM_FUNC_TYPE (func_type))
19507 case ARM_FT_ISR:
19508 case ARM_FT_FIQ:
19509 /* ??? This is wrong for unified assembly syntax. */
19510 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19511 break;
19513 case ARM_FT_INTERWORKED:
19514 sprintf (instr, "bx%s\t%%|lr", conditional);
19515 break;
19517 case ARM_FT_EXCEPTION:
19518 /* ??? This is wrong for unified assembly syntax. */
19519 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19520 break;
19522 default:
19523 /* Use bx if it's available. */
19524 if (arm_arch5 || arm_arch4t)
19525 sprintf (instr, "bx%s\t%%|lr", conditional);
19526 else
19527 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19528 break;
19531 output_asm_insn (instr, & operand);
19534 return "";
19537 /* Write the function name into the code section, directly preceding
19538 the function prologue.
19540 Code will be output similar to this:
19542 .ascii "arm_poke_function_name", 0
19543 .align
19545 .word 0xff000000 + (t1 - t0)
19546 arm_poke_function_name
19547 mov ip, sp
19548 stmfd sp!, {fp, ip, lr, pc}
19549 sub fp, ip, #4
19551 When performing a stack backtrace, code can inspect the value
19552 of 'pc' stored at 'fp' + 0. If the trace function then looks
19553 at location pc - 12 and the top 8 bits are set, then we know
19554 that there is a function name embedded immediately preceding this
19555 location and has length ((pc[-3]) & 0xff000000).
19557 We assume that pc is declared as a pointer to an unsigned long.
19559 It is of no benefit to output the function name if we are assembling
19560 a leaf function. These function types will not contain a stack
19561 backtrace structure, therefore it is not possible to determine the
19562 function name. */
19563 void
19564 arm_poke_function_name (FILE *stream, const char *name)
19566 unsigned long alignlength;
19567 unsigned long length;
19568 rtx x;
19570 length = strlen (name) + 1;
19571 alignlength = ROUND_UP_WORD (length);
19573 ASM_OUTPUT_ASCII (stream, name, length);
19574 ASM_OUTPUT_ALIGN (stream, 2);
19575 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19576 assemble_aligned_integer (UNITS_PER_WORD, x);
19579 /* Place some comments into the assembler stream
19580 describing the current function. */
19581 static void
19582 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19584 unsigned long func_type;
19586 /* ??? Do we want to print some of the below anyway? */
19587 if (TARGET_THUMB1)
19588 return;
19590 /* Sanity check. */
19591 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19593 func_type = arm_current_func_type ();
19595 switch ((int) ARM_FUNC_TYPE (func_type))
19597 default:
19598 case ARM_FT_NORMAL:
19599 break;
19600 case ARM_FT_INTERWORKED:
19601 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19602 break;
19603 case ARM_FT_ISR:
19604 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19605 break;
19606 case ARM_FT_FIQ:
19607 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19608 break;
19609 case ARM_FT_EXCEPTION:
19610 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19611 break;
19614 if (IS_NAKED (func_type))
19615 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19617 if (IS_VOLATILE (func_type))
19618 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19620 if (IS_NESTED (func_type))
19621 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19622 if (IS_STACKALIGN (func_type))
19623 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19625 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19626 crtl->args.size,
19627 crtl->args.pretend_args_size, frame_size);
19629 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19630 frame_pointer_needed,
19631 cfun->machine->uses_anonymous_args);
19633 if (cfun->machine->lr_save_eliminated)
19634 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19636 if (crtl->calls_eh_return)
19637 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19641 static void
19642 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19643 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19645 arm_stack_offsets *offsets;
19647 if (TARGET_THUMB1)
19649 int regno;
19651 /* Emit any call-via-reg trampolines that are needed for v4t support
19652 of call_reg and call_value_reg type insns. */
19653 for (regno = 0; regno < LR_REGNUM; regno++)
19655 rtx label = cfun->machine->call_via[regno];
19657 if (label != NULL)
19659 switch_to_section (function_section (current_function_decl));
19660 targetm.asm_out.internal_label (asm_out_file, "L",
19661 CODE_LABEL_NUMBER (label));
19662 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19666 /* ??? Probably not safe to set this here, since it assumes that a
19667 function will be emitted as assembly immediately after we generate
19668 RTL for it. This does not happen for inline functions. */
19669 cfun->machine->return_used_this_function = 0;
19671 else /* TARGET_32BIT */
19673 /* We need to take into account any stack-frame rounding. */
19674 offsets = arm_get_frame_offsets ();
19676 gcc_assert (!use_return_insn (FALSE, NULL)
19677 || (cfun->machine->return_used_this_function != 0)
19678 || offsets->saved_regs == offsets->outgoing_args
19679 || frame_pointer_needed);
19683 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19684 STR and STRD. If an even number of registers are being pushed, one
19685 or more STRD patterns are created for each register pair. If an
19686 odd number of registers are pushed, emit an initial STR followed by
19687 as many STRD instructions as are needed. This works best when the
19688 stack is initially 64-bit aligned (the normal case), since it
19689 ensures that each STRD is also 64-bit aligned. */
19690 static void
19691 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19693 int num_regs = 0;
19694 int i;
19695 int regno;
19696 rtx par = NULL_RTX;
19697 rtx dwarf = NULL_RTX;
19698 rtx tmp;
19699 bool first = true;
19701 num_regs = bit_count (saved_regs_mask);
19703 /* Must be at least one register to save, and can't save SP or PC. */
19704 gcc_assert (num_regs > 0 && num_regs <= 14);
19705 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19706 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19708 /* Create sequence for DWARF info. All the frame-related data for
19709 debugging is held in this wrapper. */
19710 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19712 /* Describe the stack adjustment. */
19713 tmp = gen_rtx_SET (VOIDmode,
19714 stack_pointer_rtx,
19715 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19716 RTX_FRAME_RELATED_P (tmp) = 1;
19717 XVECEXP (dwarf, 0, 0) = tmp;
19719 /* Find the first register. */
19720 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19723 i = 0;
19725 /* If there's an odd number of registers to push. Start off by
19726 pushing a single register. This ensures that subsequent strd
19727 operations are dword aligned (assuming that SP was originally
19728 64-bit aligned). */
19729 if ((num_regs & 1) != 0)
19731 rtx reg, mem, insn;
19733 reg = gen_rtx_REG (SImode, regno);
19734 if (num_regs == 1)
19735 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19736 stack_pointer_rtx));
19737 else
19738 mem = gen_frame_mem (Pmode,
19739 gen_rtx_PRE_MODIFY
19740 (Pmode, stack_pointer_rtx,
19741 plus_constant (Pmode, stack_pointer_rtx,
19742 -4 * num_regs)));
19744 tmp = gen_rtx_SET (VOIDmode, mem, reg);
19745 RTX_FRAME_RELATED_P (tmp) = 1;
19746 insn = emit_insn (tmp);
19747 RTX_FRAME_RELATED_P (insn) = 1;
19748 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19749 tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19750 reg);
19751 RTX_FRAME_RELATED_P (tmp) = 1;
19752 i++;
19753 regno++;
19754 XVECEXP (dwarf, 0, i) = tmp;
19755 first = false;
19758 while (i < num_regs)
19759 if (saved_regs_mask & (1 << regno))
19761 rtx reg1, reg2, mem1, mem2;
19762 rtx tmp0, tmp1, tmp2;
19763 int regno2;
19765 /* Find the register to pair with this one. */
19766 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19767 regno2++)
19770 reg1 = gen_rtx_REG (SImode, regno);
19771 reg2 = gen_rtx_REG (SImode, regno2);
19773 if (first)
19775 rtx insn;
19777 first = false;
19778 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19779 stack_pointer_rtx,
19780 -4 * num_regs));
19781 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19782 stack_pointer_rtx,
19783 -4 * (num_regs - 1)));
19784 tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19785 plus_constant (Pmode, stack_pointer_rtx,
19786 -4 * (num_regs)));
19787 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19788 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19789 RTX_FRAME_RELATED_P (tmp0) = 1;
19790 RTX_FRAME_RELATED_P (tmp1) = 1;
19791 RTX_FRAME_RELATED_P (tmp2) = 1;
19792 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19793 XVECEXP (par, 0, 0) = tmp0;
19794 XVECEXP (par, 0, 1) = tmp1;
19795 XVECEXP (par, 0, 2) = tmp2;
19796 insn = emit_insn (par);
19797 RTX_FRAME_RELATED_P (insn) = 1;
19798 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19800 else
19802 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19803 stack_pointer_rtx,
19804 4 * i));
19805 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19806 stack_pointer_rtx,
19807 4 * (i + 1)));
19808 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19809 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19810 RTX_FRAME_RELATED_P (tmp1) = 1;
19811 RTX_FRAME_RELATED_P (tmp2) = 1;
19812 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19813 XVECEXP (par, 0, 0) = tmp1;
19814 XVECEXP (par, 0, 1) = tmp2;
19815 emit_insn (par);
19818 /* Create unwind information. This is an approximation. */
19819 tmp1 = gen_rtx_SET (VOIDmode,
19820 gen_frame_mem (Pmode,
19821 plus_constant (Pmode,
19822 stack_pointer_rtx,
19823 4 * i)),
19824 reg1);
19825 tmp2 = gen_rtx_SET (VOIDmode,
19826 gen_frame_mem (Pmode,
19827 plus_constant (Pmode,
19828 stack_pointer_rtx,
19829 4 * (i + 1))),
19830 reg2);
19832 RTX_FRAME_RELATED_P (tmp1) = 1;
19833 RTX_FRAME_RELATED_P (tmp2) = 1;
19834 XVECEXP (dwarf, 0, i + 1) = tmp1;
19835 XVECEXP (dwarf, 0, i + 2) = tmp2;
19836 i += 2;
19837 regno = regno2 + 1;
19839 else
19840 regno++;
19842 return;
19845 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19846 whenever possible, otherwise it emits single-word stores. The first store
19847 also allocates stack space for all saved registers, using writeback with
19848 post-addressing mode. All other stores use offset addressing. If no STRD
19849 can be emitted, this function emits a sequence of single-word stores,
19850 and not an STM as before, because single-word stores provide more freedom
19851 scheduling and can be turned into an STM by peephole optimizations. */
19852 static void
19853 arm_emit_strd_push (unsigned long saved_regs_mask)
19855 int num_regs = 0;
19856 int i, j, dwarf_index = 0;
19857 int offset = 0;
19858 rtx dwarf = NULL_RTX;
19859 rtx insn = NULL_RTX;
19860 rtx tmp, mem;
19862 /* TODO: A more efficient code can be emitted by changing the
19863 layout, e.g., first push all pairs that can use STRD to keep the
19864 stack aligned, and then push all other registers. */
19865 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19866 if (saved_regs_mask & (1 << i))
19867 num_regs++;
19869 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19870 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19871 gcc_assert (num_regs > 0);
19873 /* Create sequence for DWARF info. */
19874 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19876 /* For dwarf info, we generate explicit stack update. */
19877 tmp = gen_rtx_SET (VOIDmode,
19878 stack_pointer_rtx,
19879 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19880 RTX_FRAME_RELATED_P (tmp) = 1;
19881 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19883 /* Save registers. */
19884 offset = - 4 * num_regs;
19885 j = 0;
19886 while (j <= LAST_ARM_REGNUM)
19887 if (saved_regs_mask & (1 << j))
19889 if ((j % 2 == 0)
19890 && (saved_regs_mask & (1 << (j + 1))))
19892 /* Current register and previous register form register pair for
19893 which STRD can be generated. */
19894 if (offset < 0)
19896 /* Allocate stack space for all saved registers. */
19897 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19898 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19899 mem = gen_frame_mem (DImode, tmp);
19900 offset = 0;
19902 else if (offset > 0)
19903 mem = gen_frame_mem (DImode,
19904 plus_constant (Pmode,
19905 stack_pointer_rtx,
19906 offset));
19907 else
19908 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19910 tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
19911 RTX_FRAME_RELATED_P (tmp) = 1;
19912 tmp = emit_insn (tmp);
19914 /* Record the first store insn. */
19915 if (dwarf_index == 1)
19916 insn = tmp;
19918 /* Generate dwarf info. */
19919 mem = gen_frame_mem (SImode,
19920 plus_constant (Pmode,
19921 stack_pointer_rtx,
19922 offset));
19923 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19924 RTX_FRAME_RELATED_P (tmp) = 1;
19925 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19927 mem = gen_frame_mem (SImode,
19928 plus_constant (Pmode,
19929 stack_pointer_rtx,
19930 offset + 4));
19931 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
19932 RTX_FRAME_RELATED_P (tmp) = 1;
19933 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19935 offset += 8;
19936 j += 2;
19938 else
19940 /* Emit a single word store. */
19941 if (offset < 0)
19943 /* Allocate stack space for all saved registers. */
19944 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19945 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19946 mem = gen_frame_mem (SImode, tmp);
19947 offset = 0;
19949 else if (offset > 0)
19950 mem = gen_frame_mem (SImode,
19951 plus_constant (Pmode,
19952 stack_pointer_rtx,
19953 offset));
19954 else
19955 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19957 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19958 RTX_FRAME_RELATED_P (tmp) = 1;
19959 tmp = emit_insn (tmp);
19961 /* Record the first store insn. */
19962 if (dwarf_index == 1)
19963 insn = tmp;
19965 /* Generate dwarf info. */
19966 mem = gen_frame_mem (SImode,
19967 plus_constant(Pmode,
19968 stack_pointer_rtx,
19969 offset));
19970 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19971 RTX_FRAME_RELATED_P (tmp) = 1;
19972 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19974 offset += 4;
19975 j += 1;
19978 else
19979 j++;
19981 /* Attach dwarf info to the first insn we generate. */
19982 gcc_assert (insn != NULL_RTX);
19983 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19984 RTX_FRAME_RELATED_P (insn) = 1;
19987 /* Generate and emit an insn that we will recognize as a push_multi.
19988 Unfortunately, since this insn does not reflect very well the actual
19989 semantics of the operation, we need to annotate the insn for the benefit
19990 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19991 MASK for registers that should be annotated for DWARF2 frame unwind
19992 information. */
19993 static rtx
19994 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
19996 int num_regs = 0;
19997 int num_dwarf_regs = 0;
19998 int i, j;
19999 rtx par;
20000 rtx dwarf;
20001 int dwarf_par_index;
20002 rtx tmp, reg;
20004 /* We don't record the PC in the dwarf frame information. */
20005 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20007 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20009 if (mask & (1 << i))
20010 num_regs++;
20011 if (dwarf_regs_mask & (1 << i))
20012 num_dwarf_regs++;
20015 gcc_assert (num_regs && num_regs <= 16);
20016 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20018 /* For the body of the insn we are going to generate an UNSPEC in
20019 parallel with several USEs. This allows the insn to be recognized
20020 by the push_multi pattern in the arm.md file.
20022 The body of the insn looks something like this:
20024 (parallel [
20025 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20026 (const_int:SI <num>)))
20027 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20028 (use (reg:SI XX))
20029 (use (reg:SI YY))
20033 For the frame note however, we try to be more explicit and actually
20034 show each register being stored into the stack frame, plus a (single)
20035 decrement of the stack pointer. We do it this way in order to be
20036 friendly to the stack unwinding code, which only wants to see a single
20037 stack decrement per instruction. The RTL we generate for the note looks
20038 something like this:
20040 (sequence [
20041 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20042 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20043 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20044 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20048 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20049 instead we'd have a parallel expression detailing all
20050 the stores to the various memory addresses so that debug
20051 information is more up-to-date. Remember however while writing
20052 this to take care of the constraints with the push instruction.
20054 Note also that this has to be taken care of for the VFP registers.
20056 For more see PR43399. */
20058 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20059 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20060 dwarf_par_index = 1;
20062 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20064 if (mask & (1 << i))
20066 reg = gen_rtx_REG (SImode, i);
20068 XVECEXP (par, 0, 0)
20069 = gen_rtx_SET (VOIDmode,
20070 gen_frame_mem
20071 (BLKmode,
20072 gen_rtx_PRE_MODIFY (Pmode,
20073 stack_pointer_rtx,
20074 plus_constant
20075 (Pmode, stack_pointer_rtx,
20076 -4 * num_regs))
20078 gen_rtx_UNSPEC (BLKmode,
20079 gen_rtvec (1, reg),
20080 UNSPEC_PUSH_MULT));
20082 if (dwarf_regs_mask & (1 << i))
20084 tmp = gen_rtx_SET (VOIDmode,
20085 gen_frame_mem (SImode, stack_pointer_rtx),
20086 reg);
20087 RTX_FRAME_RELATED_P (tmp) = 1;
20088 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20091 break;
20095 for (j = 1, i++; j < num_regs; i++)
20097 if (mask & (1 << i))
20099 reg = gen_rtx_REG (SImode, i);
20101 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20103 if (dwarf_regs_mask & (1 << i))
20106 = gen_rtx_SET (VOIDmode,
20107 gen_frame_mem
20108 (SImode,
20109 plus_constant (Pmode, stack_pointer_rtx,
20110 4 * j)),
20111 reg);
20112 RTX_FRAME_RELATED_P (tmp) = 1;
20113 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20116 j++;
20120 par = emit_insn (par);
20122 tmp = gen_rtx_SET (VOIDmode,
20123 stack_pointer_rtx,
20124 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20125 RTX_FRAME_RELATED_P (tmp) = 1;
20126 XVECEXP (dwarf, 0, 0) = tmp;
20128 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20130 return par;
20133 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20134 SIZE is the offset to be adjusted.
20135 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20136 static void
20137 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20139 rtx dwarf;
20141 RTX_FRAME_RELATED_P (insn) = 1;
20142 dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
20143 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20146 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20147 SAVED_REGS_MASK shows which registers need to be restored.
20149 Unfortunately, since this insn does not reflect very well the actual
20150 semantics of the operation, we need to annotate the insn for the benefit
20151 of DWARF2 frame unwind information. */
20152 static void
20153 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20155 int num_regs = 0;
20156 int i, j;
20157 rtx par;
20158 rtx dwarf = NULL_RTX;
20159 rtx tmp, reg;
20160 bool return_in_pc;
20161 int offset_adj;
20162 int emit_update;
20164 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20165 offset_adj = return_in_pc ? 1 : 0;
20166 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20167 if (saved_regs_mask & (1 << i))
20168 num_regs++;
20170 gcc_assert (num_regs && num_regs <= 16);
20172 /* If SP is in reglist, then we don't emit SP update insn. */
20173 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20175 /* The parallel needs to hold num_regs SETs
20176 and one SET for the stack update. */
20177 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20179 if (return_in_pc)
20181 tmp = ret_rtx;
20182 XVECEXP (par, 0, 0) = tmp;
20185 if (emit_update)
20187 /* Increment the stack pointer, based on there being
20188 num_regs 4-byte registers to restore. */
20189 tmp = gen_rtx_SET (VOIDmode,
20190 stack_pointer_rtx,
20191 plus_constant (Pmode,
20192 stack_pointer_rtx,
20193 4 * num_regs));
20194 RTX_FRAME_RELATED_P (tmp) = 1;
20195 XVECEXP (par, 0, offset_adj) = tmp;
20198 /* Now restore every reg, which may include PC. */
20199 for (j = 0, i = 0; j < num_regs; i++)
20200 if (saved_regs_mask & (1 << i))
20202 reg = gen_rtx_REG (SImode, i);
20203 if ((num_regs == 1) && emit_update && !return_in_pc)
20205 /* Emit single load with writeback. */
20206 tmp = gen_frame_mem (SImode,
20207 gen_rtx_POST_INC (Pmode,
20208 stack_pointer_rtx));
20209 tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
20210 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20211 return;
20214 tmp = gen_rtx_SET (VOIDmode,
20215 reg,
20216 gen_frame_mem
20217 (SImode,
20218 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20219 RTX_FRAME_RELATED_P (tmp) = 1;
20220 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20222 /* We need to maintain a sequence for DWARF info too. As dwarf info
20223 should not have PC, skip PC. */
20224 if (i != PC_REGNUM)
20225 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20227 j++;
20230 if (return_in_pc)
20231 par = emit_jump_insn (par);
20232 else
20233 par = emit_insn (par);
20235 REG_NOTES (par) = dwarf;
20236 if (!return_in_pc)
20237 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20238 stack_pointer_rtx, stack_pointer_rtx);
20241 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20242 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20244 Unfortunately, since this insn does not reflect very well the actual
20245 semantics of the operation, we need to annotate the insn for the benefit
20246 of DWARF2 frame unwind information. */
20247 static void
20248 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20250 int i, j;
20251 rtx par;
20252 rtx dwarf = NULL_RTX;
20253 rtx tmp, reg;
20255 gcc_assert (num_regs && num_regs <= 32);
20257 /* Workaround ARM10 VFPr1 bug. */
20258 if (num_regs == 2 && !arm_arch6)
20260 if (first_reg == 15)
20261 first_reg--;
20263 num_regs++;
20266 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20267 there could be up to 32 D-registers to restore.
20268 If there are more than 16 D-registers, make two recursive calls,
20269 each of which emits one pop_multi instruction. */
20270 if (num_regs > 16)
20272 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20273 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20274 return;
20277 /* The parallel needs to hold num_regs SETs
20278 and one SET for the stack update. */
20279 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20281 /* Increment the stack pointer, based on there being
20282 num_regs 8-byte registers to restore. */
20283 tmp = gen_rtx_SET (VOIDmode,
20284 base_reg,
20285 plus_constant (Pmode, base_reg, 8 * num_regs));
20286 RTX_FRAME_RELATED_P (tmp) = 1;
20287 XVECEXP (par, 0, 0) = tmp;
20289 /* Now show every reg that will be restored, using a SET for each. */
20290 for (j = 0, i=first_reg; j < num_regs; i += 2)
20292 reg = gen_rtx_REG (DFmode, i);
20294 tmp = gen_rtx_SET (VOIDmode,
20295 reg,
20296 gen_frame_mem
20297 (DFmode,
20298 plus_constant (Pmode, base_reg, 8 * j)));
20299 RTX_FRAME_RELATED_P (tmp) = 1;
20300 XVECEXP (par, 0, j + 1) = tmp;
20302 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20304 j++;
20307 par = emit_insn (par);
20308 REG_NOTES (par) = dwarf;
20310 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20311 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20313 RTX_FRAME_RELATED_P (par) = 1;
20314 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20316 else
20317 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20318 base_reg, base_reg);
20321 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20322 number of registers are being popped, multiple LDRD patterns are created for
20323 all register pairs. If odd number of registers are popped, last register is
20324 loaded by using LDR pattern. */
20325 static void
20326 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20328 int num_regs = 0;
20329 int i, j;
20330 rtx par = NULL_RTX;
20331 rtx dwarf = NULL_RTX;
20332 rtx tmp, reg, tmp1;
20333 bool return_in_pc;
20335 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20336 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20337 if (saved_regs_mask & (1 << i))
20338 num_regs++;
20340 gcc_assert (num_regs && num_regs <= 16);
20342 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20343 to be popped. So, if num_regs is even, now it will become odd,
20344 and we can generate pop with PC. If num_regs is odd, it will be
20345 even now, and ldr with return can be generated for PC. */
20346 if (return_in_pc)
20347 num_regs--;
20349 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20351 /* Var j iterates over all the registers to gather all the registers in
20352 saved_regs_mask. Var i gives index of saved registers in stack frame.
20353 A PARALLEL RTX of register-pair is created here, so that pattern for
20354 LDRD can be matched. As PC is always last register to be popped, and
20355 we have already decremented num_regs if PC, we don't have to worry
20356 about PC in this loop. */
20357 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20358 if (saved_regs_mask & (1 << j))
20360 /* Create RTX for memory load. */
20361 reg = gen_rtx_REG (SImode, j);
20362 tmp = gen_rtx_SET (SImode,
20363 reg,
20364 gen_frame_mem (SImode,
20365 plus_constant (Pmode,
20366 stack_pointer_rtx, 4 * i)));
20367 RTX_FRAME_RELATED_P (tmp) = 1;
20369 if (i % 2 == 0)
20371 /* When saved-register index (i) is even, the RTX to be emitted is
20372 yet to be created. Hence create it first. The LDRD pattern we
20373 are generating is :
20374 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20375 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20376 where target registers need not be consecutive. */
20377 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20378 dwarf = NULL_RTX;
20381 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20382 added as 0th element and if i is odd, reg_i is added as 1st element
20383 of LDRD pattern shown above. */
20384 XVECEXP (par, 0, (i % 2)) = tmp;
20385 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20387 if ((i % 2) == 1)
20389 /* When saved-register index (i) is odd, RTXs for both the registers
20390 to be loaded are generated in above given LDRD pattern, and the
20391 pattern can be emitted now. */
20392 par = emit_insn (par);
20393 REG_NOTES (par) = dwarf;
20394 RTX_FRAME_RELATED_P (par) = 1;
20397 i++;
20400 /* If the number of registers pushed is odd AND return_in_pc is false OR
20401 number of registers are even AND return_in_pc is true, last register is
20402 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20403 then LDR with post increment. */
20405 /* Increment the stack pointer, based on there being
20406 num_regs 4-byte registers to restore. */
20407 tmp = gen_rtx_SET (VOIDmode,
20408 stack_pointer_rtx,
20409 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20410 RTX_FRAME_RELATED_P (tmp) = 1;
20411 tmp = emit_insn (tmp);
20412 if (!return_in_pc)
20414 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20415 stack_pointer_rtx, stack_pointer_rtx);
20418 dwarf = NULL_RTX;
20420 if (((num_regs % 2) == 1 && !return_in_pc)
20421 || ((num_regs % 2) == 0 && return_in_pc))
20423 /* Scan for the single register to be popped. Skip until the saved
20424 register is found. */
20425 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20427 /* Gen LDR with post increment here. */
20428 tmp1 = gen_rtx_MEM (SImode,
20429 gen_rtx_POST_INC (SImode,
20430 stack_pointer_rtx));
20431 set_mem_alias_set (tmp1, get_frame_alias_set ());
20433 reg = gen_rtx_REG (SImode, j);
20434 tmp = gen_rtx_SET (SImode, reg, tmp1);
20435 RTX_FRAME_RELATED_P (tmp) = 1;
20436 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20438 if (return_in_pc)
20440 /* If return_in_pc, j must be PC_REGNUM. */
20441 gcc_assert (j == PC_REGNUM);
20442 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20443 XVECEXP (par, 0, 0) = ret_rtx;
20444 XVECEXP (par, 0, 1) = tmp;
20445 par = emit_jump_insn (par);
20447 else
20449 par = emit_insn (tmp);
20450 REG_NOTES (par) = dwarf;
20451 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20452 stack_pointer_rtx, stack_pointer_rtx);
20456 else if ((num_regs % 2) == 1 && return_in_pc)
20458 /* There are 2 registers to be popped. So, generate the pattern
20459 pop_multiple_with_stack_update_and_return to pop in PC. */
20460 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20463 return;
20466 /* LDRD in ARM mode needs consecutive registers as operands. This function
20467 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20468 offset addressing and then generates one separate stack udpate. This provides
20469 more scheduling freedom, compared to writeback on every load. However,
20470 if the function returns using load into PC directly
20471 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20472 before the last load. TODO: Add a peephole optimization to recognize
20473 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20474 peephole optimization to merge the load at stack-offset zero
20475 with the stack update instruction using load with writeback
20476 in post-index addressing mode. */
20477 static void
20478 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20480 int j = 0;
20481 int offset = 0;
20482 rtx par = NULL_RTX;
20483 rtx dwarf = NULL_RTX;
20484 rtx tmp, mem;
20486 /* Restore saved registers. */
20487 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20488 j = 0;
20489 while (j <= LAST_ARM_REGNUM)
20490 if (saved_regs_mask & (1 << j))
20492 if ((j % 2) == 0
20493 && (saved_regs_mask & (1 << (j + 1)))
20494 && (j + 1) != PC_REGNUM)
20496 /* Current register and next register form register pair for which
20497 LDRD can be generated. PC is always the last register popped, and
20498 we handle it separately. */
20499 if (offset > 0)
20500 mem = gen_frame_mem (DImode,
20501 plus_constant (Pmode,
20502 stack_pointer_rtx,
20503 offset));
20504 else
20505 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20507 tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20508 tmp = emit_insn (tmp);
20509 RTX_FRAME_RELATED_P (tmp) = 1;
20511 /* Generate dwarf info. */
20513 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20514 gen_rtx_REG (SImode, j),
20515 NULL_RTX);
20516 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20517 gen_rtx_REG (SImode, j + 1),
20518 dwarf);
20520 REG_NOTES (tmp) = dwarf;
20522 offset += 8;
20523 j += 2;
20525 else if (j != PC_REGNUM)
20527 /* Emit a single word load. */
20528 if (offset > 0)
20529 mem = gen_frame_mem (SImode,
20530 plus_constant (Pmode,
20531 stack_pointer_rtx,
20532 offset));
20533 else
20534 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20536 tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20537 tmp = emit_insn (tmp);
20538 RTX_FRAME_RELATED_P (tmp) = 1;
20540 /* Generate dwarf info. */
20541 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20542 gen_rtx_REG (SImode, j),
20543 NULL_RTX);
20545 offset += 4;
20546 j += 1;
20548 else /* j == PC_REGNUM */
20549 j++;
20551 else
20552 j++;
20554 /* Update the stack. */
20555 if (offset > 0)
20557 tmp = gen_rtx_SET (Pmode,
20558 stack_pointer_rtx,
20559 plus_constant (Pmode,
20560 stack_pointer_rtx,
20561 offset));
20562 tmp = emit_insn (tmp);
20563 arm_add_cfa_adjust_cfa_note (tmp, offset,
20564 stack_pointer_rtx, stack_pointer_rtx);
20565 offset = 0;
20568 if (saved_regs_mask & (1 << PC_REGNUM))
20570 /* Only PC is to be popped. */
20571 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20572 XVECEXP (par, 0, 0) = ret_rtx;
20573 tmp = gen_rtx_SET (SImode,
20574 gen_rtx_REG (SImode, PC_REGNUM),
20575 gen_frame_mem (SImode,
20576 gen_rtx_POST_INC (SImode,
20577 stack_pointer_rtx)));
20578 RTX_FRAME_RELATED_P (tmp) = 1;
20579 XVECEXP (par, 0, 1) = tmp;
20580 par = emit_jump_insn (par);
20582 /* Generate dwarf info. */
20583 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20584 gen_rtx_REG (SImode, PC_REGNUM),
20585 NULL_RTX);
20586 REG_NOTES (par) = dwarf;
20587 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20588 stack_pointer_rtx, stack_pointer_rtx);
20592 /* Calculate the size of the return value that is passed in registers. */
20593 static unsigned
20594 arm_size_return_regs (void)
20596 machine_mode mode;
20598 if (crtl->return_rtx != 0)
20599 mode = GET_MODE (crtl->return_rtx);
20600 else
20601 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20603 return GET_MODE_SIZE (mode);
20606 /* Return true if the current function needs to save/restore LR. */
20607 static bool
20608 thumb_force_lr_save (void)
20610 return !cfun->machine->lr_save_eliminated
20611 && (!leaf_function_p ()
20612 || thumb_far_jump_used_p ()
20613 || df_regs_ever_live_p (LR_REGNUM));
20616 /* We do not know if r3 will be available because
20617 we do have an indirect tailcall happening in this
20618 particular case. */
20619 static bool
20620 is_indirect_tailcall_p (rtx call)
20622 rtx pat = PATTERN (call);
20624 /* Indirect tail call. */
20625 pat = XVECEXP (pat, 0, 0);
20626 if (GET_CODE (pat) == SET)
20627 pat = SET_SRC (pat);
20629 pat = XEXP (XEXP (pat, 0), 0);
20630 return REG_P (pat);
20633 /* Return true if r3 is used by any of the tail call insns in the
20634 current function. */
20635 static bool
20636 any_sibcall_could_use_r3 (void)
20638 edge_iterator ei;
20639 edge e;
20641 if (!crtl->tail_call_emit)
20642 return false;
20643 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20644 if (e->flags & EDGE_SIBCALL)
20646 rtx call = BB_END (e->src);
20647 if (!CALL_P (call))
20648 call = prev_nonnote_nondebug_insn (call);
20649 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20650 if (find_regno_fusage (call, USE, 3)
20651 || is_indirect_tailcall_p (call))
20652 return true;
20654 return false;
20658 /* Compute the distance from register FROM to register TO.
20659 These can be the arg pointer (26), the soft frame pointer (25),
20660 the stack pointer (13) or the hard frame pointer (11).
20661 In thumb mode r7 is used as the soft frame pointer, if needed.
20662 Typical stack layout looks like this:
20664 old stack pointer -> | |
20665 ----
20666 | | \
20667 | | saved arguments for
20668 | | vararg functions
20669 | | /
20671 hard FP & arg pointer -> | | \
20672 | | stack
20673 | | frame
20674 | | /
20676 | | \
20677 | | call saved
20678 | | registers
20679 soft frame pointer -> | | /
20681 | | \
20682 | | local
20683 | | variables
20684 locals base pointer -> | | /
20686 | | \
20687 | | outgoing
20688 | | arguments
20689 current stack pointer -> | | /
20692 For a given function some or all of these stack components
20693 may not be needed, giving rise to the possibility of
20694 eliminating some of the registers.
20696 The values returned by this function must reflect the behavior
20697 of arm_expand_prologue() and arm_compute_save_reg_mask().
20699 The sign of the number returned reflects the direction of stack
20700 growth, so the values are positive for all eliminations except
20701 from the soft frame pointer to the hard frame pointer.
20703 SFP may point just inside the local variables block to ensure correct
20704 alignment. */
20707 /* Calculate stack offsets. These are used to calculate register elimination
20708 offsets and in prologue/epilogue code. Also calculates which registers
20709 should be saved. */
20711 static arm_stack_offsets *
20712 arm_get_frame_offsets (void)
20714 struct arm_stack_offsets *offsets;
20715 unsigned long func_type;
20716 int leaf;
20717 int saved;
20718 int core_saved;
20719 HOST_WIDE_INT frame_size;
20720 int i;
20722 offsets = &cfun->machine->stack_offsets;
20724 /* We need to know if we are a leaf function. Unfortunately, it
20725 is possible to be called after start_sequence has been called,
20726 which causes get_insns to return the insns for the sequence,
20727 not the function, which will cause leaf_function_p to return
20728 the incorrect result.
20730 to know about leaf functions once reload has completed, and the
20731 frame size cannot be changed after that time, so we can safely
20732 use the cached value. */
20734 if (reload_completed)
20735 return offsets;
20737 /* Initially this is the size of the local variables. It will translated
20738 into an offset once we have determined the size of preceding data. */
20739 frame_size = ROUND_UP_WORD (get_frame_size ());
20741 leaf = leaf_function_p ();
20743 /* Space for variadic functions. */
20744 offsets->saved_args = crtl->args.pretend_args_size;
20746 /* In Thumb mode this is incorrect, but never used. */
20747 offsets->frame
20748 = (offsets->saved_args
20749 + arm_compute_static_chain_stack_bytes ()
20750 + (frame_pointer_needed ? 4 : 0));
20752 if (TARGET_32BIT)
20754 unsigned int regno;
20756 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20757 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20758 saved = core_saved;
20760 /* We know that SP will be doubleword aligned on entry, and we must
20761 preserve that condition at any subroutine call. We also require the
20762 soft frame pointer to be doubleword aligned. */
20764 if (TARGET_REALLY_IWMMXT)
20766 /* Check for the call-saved iWMMXt registers. */
20767 for (regno = FIRST_IWMMXT_REGNUM;
20768 regno <= LAST_IWMMXT_REGNUM;
20769 regno++)
20770 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20771 saved += 8;
20774 func_type = arm_current_func_type ();
20775 /* Space for saved VFP registers. */
20776 if (! IS_VOLATILE (func_type)
20777 && TARGET_HARD_FLOAT && TARGET_VFP)
20778 saved += arm_get_vfp_saved_size ();
20780 else /* TARGET_THUMB1 */
20782 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20783 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20784 saved = core_saved;
20785 if (TARGET_BACKTRACE)
20786 saved += 16;
20789 /* Saved registers include the stack frame. */
20790 offsets->saved_regs
20791 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20792 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20794 /* A leaf function does not need any stack alignment if it has nothing
20795 on the stack. */
20796 if (leaf && frame_size == 0
20797 /* However if it calls alloca(), we have a dynamically allocated
20798 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20799 && ! cfun->calls_alloca)
20801 offsets->outgoing_args = offsets->soft_frame;
20802 offsets->locals_base = offsets->soft_frame;
20803 return offsets;
20806 /* Ensure SFP has the correct alignment. */
20807 if (ARM_DOUBLEWORD_ALIGN
20808 && (offsets->soft_frame & 7))
20810 offsets->soft_frame += 4;
20811 /* Try to align stack by pushing an extra reg. Don't bother doing this
20812 when there is a stack frame as the alignment will be rolled into
20813 the normal stack adjustment. */
20814 if (frame_size + crtl->outgoing_args_size == 0)
20816 int reg = -1;
20818 /* Register r3 is caller-saved. Normally it does not need to be
20819 saved on entry by the prologue. However if we choose to save
20820 it for padding then we may confuse the compiler into thinking
20821 a prologue sequence is required when in fact it is not. This
20822 will occur when shrink-wrapping if r3 is used as a scratch
20823 register and there are no other callee-saved writes.
20825 This situation can be avoided when other callee-saved registers
20826 are available and r3 is not mandatory if we choose a callee-saved
20827 register for padding. */
20828 bool prefer_callee_reg_p = false;
20830 /* If it is safe to use r3, then do so. This sometimes
20831 generates better code on Thumb-2 by avoiding the need to
20832 use 32-bit push/pop instructions. */
20833 if (! any_sibcall_could_use_r3 ()
20834 && arm_size_return_regs () <= 12
20835 && (offsets->saved_regs_mask & (1 << 3)) == 0
20836 && (TARGET_THUMB2
20837 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20839 reg = 3;
20840 if (!TARGET_THUMB2)
20841 prefer_callee_reg_p = true;
20843 if (reg == -1
20844 || prefer_callee_reg_p)
20846 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20848 /* Avoid fixed registers; they may be changed at
20849 arbitrary times so it's unsafe to restore them
20850 during the epilogue. */
20851 if (!fixed_regs[i]
20852 && (offsets->saved_regs_mask & (1 << i)) == 0)
20854 reg = i;
20855 break;
20860 if (reg != -1)
20862 offsets->saved_regs += 4;
20863 offsets->saved_regs_mask |= (1 << reg);
20868 offsets->locals_base = offsets->soft_frame + frame_size;
20869 offsets->outgoing_args = (offsets->locals_base
20870 + crtl->outgoing_args_size);
20872 if (ARM_DOUBLEWORD_ALIGN)
20874 /* Ensure SP remains doubleword aligned. */
20875 if (offsets->outgoing_args & 7)
20876 offsets->outgoing_args += 4;
20877 gcc_assert (!(offsets->outgoing_args & 7));
20880 return offsets;
20884 /* Calculate the relative offsets for the different stack pointers. Positive
20885 offsets are in the direction of stack growth. */
20887 HOST_WIDE_INT
20888 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20890 arm_stack_offsets *offsets;
20892 offsets = arm_get_frame_offsets ();
20894 /* OK, now we have enough information to compute the distances.
20895 There must be an entry in these switch tables for each pair
20896 of registers in ELIMINABLE_REGS, even if some of the entries
20897 seem to be redundant or useless. */
20898 switch (from)
20900 case ARG_POINTER_REGNUM:
20901 switch (to)
20903 case THUMB_HARD_FRAME_POINTER_REGNUM:
20904 return 0;
20906 case FRAME_POINTER_REGNUM:
20907 /* This is the reverse of the soft frame pointer
20908 to hard frame pointer elimination below. */
20909 return offsets->soft_frame - offsets->saved_args;
20911 case ARM_HARD_FRAME_POINTER_REGNUM:
20912 /* This is only non-zero in the case where the static chain register
20913 is stored above the frame. */
20914 return offsets->frame - offsets->saved_args - 4;
20916 case STACK_POINTER_REGNUM:
20917 /* If nothing has been pushed on the stack at all
20918 then this will return -4. This *is* correct! */
20919 return offsets->outgoing_args - (offsets->saved_args + 4);
20921 default:
20922 gcc_unreachable ();
20924 gcc_unreachable ();
20926 case FRAME_POINTER_REGNUM:
20927 switch (to)
20929 case THUMB_HARD_FRAME_POINTER_REGNUM:
20930 return 0;
20932 case ARM_HARD_FRAME_POINTER_REGNUM:
20933 /* The hard frame pointer points to the top entry in the
20934 stack frame. The soft frame pointer to the bottom entry
20935 in the stack frame. If there is no stack frame at all,
20936 then they are identical. */
20938 return offsets->frame - offsets->soft_frame;
20940 case STACK_POINTER_REGNUM:
20941 return offsets->outgoing_args - offsets->soft_frame;
20943 default:
20944 gcc_unreachable ();
20946 gcc_unreachable ();
20948 default:
20949 /* You cannot eliminate from the stack pointer.
20950 In theory you could eliminate from the hard frame
20951 pointer to the stack pointer, but this will never
20952 happen, since if a stack frame is not needed the
20953 hard frame pointer will never be used. */
20954 gcc_unreachable ();
20958 /* Given FROM and TO register numbers, say whether this elimination is
20959 allowed. Frame pointer elimination is automatically handled.
20961 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20962 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20963 pointer, we must eliminate FRAME_POINTER_REGNUM into
20964 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20965 ARG_POINTER_REGNUM. */
20967 bool
20968 arm_can_eliminate (const int from, const int to)
20970 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20971 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20972 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20973 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20974 true);
20977 /* Emit RTL to save coprocessor registers on function entry. Returns the
20978 number of bytes pushed. */
20980 static int
20981 arm_save_coproc_regs(void)
20983 int saved_size = 0;
20984 unsigned reg;
20985 unsigned start_reg;
20986 rtx insn;
20988 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20989 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20991 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20992 insn = gen_rtx_MEM (V2SImode, insn);
20993 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20994 RTX_FRAME_RELATED_P (insn) = 1;
20995 saved_size += 8;
20998 if (TARGET_HARD_FLOAT && TARGET_VFP)
21000 start_reg = FIRST_VFP_REGNUM;
21002 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21004 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21005 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21007 if (start_reg != reg)
21008 saved_size += vfp_emit_fstmd (start_reg,
21009 (reg - start_reg) / 2);
21010 start_reg = reg + 2;
21013 if (start_reg != reg)
21014 saved_size += vfp_emit_fstmd (start_reg,
21015 (reg - start_reg) / 2);
21017 return saved_size;
21021 /* Set the Thumb frame pointer from the stack pointer. */
21023 static void
21024 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21026 HOST_WIDE_INT amount;
21027 rtx insn, dwarf;
21029 amount = offsets->outgoing_args - offsets->locals_base;
21030 if (amount < 1024)
21031 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21032 stack_pointer_rtx, GEN_INT (amount)));
21033 else
21035 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21036 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21037 expects the first two operands to be the same. */
21038 if (TARGET_THUMB2)
21040 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21041 stack_pointer_rtx,
21042 hard_frame_pointer_rtx));
21044 else
21046 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21047 hard_frame_pointer_rtx,
21048 stack_pointer_rtx));
21050 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
21051 plus_constant (Pmode, stack_pointer_rtx, amount));
21052 RTX_FRAME_RELATED_P (dwarf) = 1;
21053 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21056 RTX_FRAME_RELATED_P (insn) = 1;
21059 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21060 function. */
21061 void
21062 arm_expand_prologue (void)
21064 rtx amount;
21065 rtx insn;
21066 rtx ip_rtx;
21067 unsigned long live_regs_mask;
21068 unsigned long func_type;
21069 int fp_offset = 0;
21070 int saved_pretend_args = 0;
21071 int saved_regs = 0;
21072 unsigned HOST_WIDE_INT args_to_push;
21073 arm_stack_offsets *offsets;
21075 func_type = arm_current_func_type ();
21077 /* Naked functions don't have prologues. */
21078 if (IS_NAKED (func_type))
21079 return;
21081 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21082 args_to_push = crtl->args.pretend_args_size;
21084 /* Compute which register we will have to save onto the stack. */
21085 offsets = arm_get_frame_offsets ();
21086 live_regs_mask = offsets->saved_regs_mask;
21088 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21090 if (IS_STACKALIGN (func_type))
21092 rtx r0, r1;
21094 /* Handle a word-aligned stack pointer. We generate the following:
21096 mov r0, sp
21097 bic r1, r0, #7
21098 mov sp, r1
21099 <save and restore r0 in normal prologue/epilogue>
21100 mov sp, r0
21101 bx lr
21103 The unwinder doesn't need to know about the stack realignment.
21104 Just tell it we saved SP in r0. */
21105 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21107 r0 = gen_rtx_REG (SImode, 0);
21108 r1 = gen_rtx_REG (SImode, 1);
21110 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21111 RTX_FRAME_RELATED_P (insn) = 1;
21112 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21114 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21116 /* ??? The CFA changes here, which may cause GDB to conclude that it
21117 has entered a different function. That said, the unwind info is
21118 correct, individually, before and after this instruction because
21119 we've described the save of SP, which will override the default
21120 handling of SP as restoring from the CFA. */
21121 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21124 /* For APCS frames, if IP register is clobbered
21125 when creating frame, save that register in a special
21126 way. */
21127 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21129 if (IS_INTERRUPT (func_type))
21131 /* Interrupt functions must not corrupt any registers.
21132 Creating a frame pointer however, corrupts the IP
21133 register, so we must push it first. */
21134 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21136 /* Do not set RTX_FRAME_RELATED_P on this insn.
21137 The dwarf stack unwinding code only wants to see one
21138 stack decrement per function, and this is not it. If
21139 this instruction is labeled as being part of the frame
21140 creation sequence then dwarf2out_frame_debug_expr will
21141 die when it encounters the assignment of IP to FP
21142 later on, since the use of SP here establishes SP as
21143 the CFA register and not IP.
21145 Anyway this instruction is not really part of the stack
21146 frame creation although it is part of the prologue. */
21148 else if (IS_NESTED (func_type))
21150 /* The static chain register is the same as the IP register
21151 used as a scratch register during stack frame creation.
21152 To get around this need to find somewhere to store IP
21153 whilst the frame is being created. We try the following
21154 places in order:
21156 1. The last argument register r3 if it is available.
21157 2. A slot on the stack above the frame if there are no
21158 arguments to push onto the stack.
21159 3. Register r3 again, after pushing the argument registers
21160 onto the stack, if this is a varargs function.
21161 4. The last slot on the stack created for the arguments to
21162 push, if this isn't a varargs function.
21164 Note - we only need to tell the dwarf2 backend about the SP
21165 adjustment in the second variant; the static chain register
21166 doesn't need to be unwound, as it doesn't contain a value
21167 inherited from the caller. */
21169 if (!arm_r3_live_at_start_p ())
21170 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21171 else if (args_to_push == 0)
21173 rtx addr, dwarf;
21175 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21176 saved_regs += 4;
21178 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21179 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21180 fp_offset = 4;
21182 /* Just tell the dwarf backend that we adjusted SP. */
21183 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21184 plus_constant (Pmode, stack_pointer_rtx,
21185 -fp_offset));
21186 RTX_FRAME_RELATED_P (insn) = 1;
21187 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21189 else
21191 /* Store the args on the stack. */
21192 if (cfun->machine->uses_anonymous_args)
21194 insn
21195 = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21196 (0xf0 >> (args_to_push / 4)) & 0xf);
21197 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21198 saved_pretend_args = 1;
21200 else
21202 rtx addr, dwarf;
21204 if (args_to_push == 4)
21205 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21206 else
21207 addr
21208 = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21209 plus_constant (Pmode,
21210 stack_pointer_rtx,
21211 -args_to_push));
21213 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21215 /* Just tell the dwarf backend that we adjusted SP. */
21216 dwarf
21217 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21218 plus_constant (Pmode, stack_pointer_rtx,
21219 -args_to_push));
21220 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21223 RTX_FRAME_RELATED_P (insn) = 1;
21224 fp_offset = args_to_push;
21225 args_to_push = 0;
21229 insn = emit_set_insn (ip_rtx,
21230 plus_constant (Pmode, stack_pointer_rtx,
21231 fp_offset));
21232 RTX_FRAME_RELATED_P (insn) = 1;
21235 if (args_to_push)
21237 /* Push the argument registers, or reserve space for them. */
21238 if (cfun->machine->uses_anonymous_args)
21239 insn = emit_multi_reg_push
21240 ((0xf0 >> (args_to_push / 4)) & 0xf,
21241 (0xf0 >> (args_to_push / 4)) & 0xf);
21242 else
21243 insn = emit_insn
21244 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21245 GEN_INT (- args_to_push)));
21246 RTX_FRAME_RELATED_P (insn) = 1;
21249 /* If this is an interrupt service routine, and the link register
21250 is going to be pushed, and we're not generating extra
21251 push of IP (needed when frame is needed and frame layout if apcs),
21252 subtracting four from LR now will mean that the function return
21253 can be done with a single instruction. */
21254 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21255 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21256 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21257 && TARGET_ARM)
21259 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21261 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21264 if (live_regs_mask)
21266 unsigned long dwarf_regs_mask = live_regs_mask;
21268 saved_regs += bit_count (live_regs_mask) * 4;
21269 if (optimize_size && !frame_pointer_needed
21270 && saved_regs == offsets->saved_regs - offsets->saved_args)
21272 /* If no coprocessor registers are being pushed and we don't have
21273 to worry about a frame pointer then push extra registers to
21274 create the stack frame. This is done is a way that does not
21275 alter the frame layout, so is independent of the epilogue. */
21276 int n;
21277 int frame;
21278 n = 0;
21279 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21280 n++;
21281 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21282 if (frame && n * 4 >= frame)
21284 n = frame / 4;
21285 live_regs_mask |= (1 << n) - 1;
21286 saved_regs += frame;
21290 if (TARGET_LDRD
21291 && current_tune->prefer_ldrd_strd
21292 && !optimize_function_for_size_p (cfun))
21294 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21295 if (TARGET_THUMB2)
21296 thumb2_emit_strd_push (live_regs_mask);
21297 else if (TARGET_ARM
21298 && !TARGET_APCS_FRAME
21299 && !IS_INTERRUPT (func_type))
21300 arm_emit_strd_push (live_regs_mask);
21301 else
21303 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21304 RTX_FRAME_RELATED_P (insn) = 1;
21307 else
21309 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21310 RTX_FRAME_RELATED_P (insn) = 1;
21314 if (! IS_VOLATILE (func_type))
21315 saved_regs += arm_save_coproc_regs ();
21317 if (frame_pointer_needed && TARGET_ARM)
21319 /* Create the new frame pointer. */
21320 if (TARGET_APCS_FRAME)
21322 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21323 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21324 RTX_FRAME_RELATED_P (insn) = 1;
21326 if (IS_NESTED (func_type))
21328 /* Recover the static chain register. */
21329 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21330 insn = gen_rtx_REG (SImode, 3);
21331 else
21333 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21334 insn = gen_frame_mem (SImode, insn);
21336 emit_set_insn (ip_rtx, insn);
21337 /* Add a USE to stop propagate_one_insn() from barfing. */
21338 emit_insn (gen_force_register_use (ip_rtx));
21341 else
21343 insn = GEN_INT (saved_regs - 4);
21344 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21345 stack_pointer_rtx, insn));
21346 RTX_FRAME_RELATED_P (insn) = 1;
21350 if (flag_stack_usage_info)
21351 current_function_static_stack_size
21352 = offsets->outgoing_args - offsets->saved_args;
21354 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21356 /* This add can produce multiple insns for a large constant, so we
21357 need to get tricky. */
21358 rtx_insn *last = get_last_insn ();
21360 amount = GEN_INT (offsets->saved_args + saved_regs
21361 - offsets->outgoing_args);
21363 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21364 amount));
21367 last = last ? NEXT_INSN (last) : get_insns ();
21368 RTX_FRAME_RELATED_P (last) = 1;
21370 while (last != insn);
21372 /* If the frame pointer is needed, emit a special barrier that
21373 will prevent the scheduler from moving stores to the frame
21374 before the stack adjustment. */
21375 if (frame_pointer_needed)
21376 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21377 hard_frame_pointer_rtx));
21381 if (frame_pointer_needed && TARGET_THUMB2)
21382 thumb_set_frame_pointer (offsets);
21384 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21386 unsigned long mask;
21388 mask = live_regs_mask;
21389 mask &= THUMB2_WORK_REGS;
21390 if (!IS_NESTED (func_type))
21391 mask |= (1 << IP_REGNUM);
21392 arm_load_pic_register (mask);
21395 /* If we are profiling, make sure no instructions are scheduled before
21396 the call to mcount. Similarly if the user has requested no
21397 scheduling in the prolog. Similarly if we want non-call exceptions
21398 using the EABI unwinder, to prevent faulting instructions from being
21399 swapped with a stack adjustment. */
21400 if (crtl->profile || !TARGET_SCHED_PROLOG
21401 || (arm_except_unwind_info (&global_options) == UI_TARGET
21402 && cfun->can_throw_non_call_exceptions))
21403 emit_insn (gen_blockage ());
21405 /* If the link register is being kept alive, with the return address in it,
21406 then make sure that it does not get reused by the ce2 pass. */
21407 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21408 cfun->machine->lr_save_eliminated = 1;
21411 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21412 static void
21413 arm_print_condition (FILE *stream)
21415 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21417 /* Branch conversion is not implemented for Thumb-2. */
21418 if (TARGET_THUMB)
21420 output_operand_lossage ("predicated Thumb instruction");
21421 return;
21423 if (current_insn_predicate != NULL)
21425 output_operand_lossage
21426 ("predicated instruction in conditional sequence");
21427 return;
21430 fputs (arm_condition_codes[arm_current_cc], stream);
21432 else if (current_insn_predicate)
21434 enum arm_cond_code code;
21436 if (TARGET_THUMB1)
21438 output_operand_lossage ("predicated Thumb instruction");
21439 return;
21442 code = get_arm_condition_code (current_insn_predicate);
21443 fputs (arm_condition_codes[code], stream);
21448 /* Globally reserved letters: acln
21449 Puncutation letters currently used: @_|?().!#
21450 Lower case letters currently used: bcdefhimpqtvwxyz
21451 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21452 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21454 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21456 If CODE is 'd', then the X is a condition operand and the instruction
21457 should only be executed if the condition is true.
21458 if CODE is 'D', then the X is a condition operand and the instruction
21459 should only be executed if the condition is false: however, if the mode
21460 of the comparison is CCFPEmode, then always execute the instruction -- we
21461 do this because in these circumstances !GE does not necessarily imply LT;
21462 in these cases the instruction pattern will take care to make sure that
21463 an instruction containing %d will follow, thereby undoing the effects of
21464 doing this instruction unconditionally.
21465 If CODE is 'N' then X is a floating point operand that must be negated
21466 before output.
21467 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21468 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21469 static void
21470 arm_print_operand (FILE *stream, rtx x, int code)
21472 switch (code)
21474 case '@':
21475 fputs (ASM_COMMENT_START, stream);
21476 return;
21478 case '_':
21479 fputs (user_label_prefix, stream);
21480 return;
21482 case '|':
21483 fputs (REGISTER_PREFIX, stream);
21484 return;
21486 case '?':
21487 arm_print_condition (stream);
21488 return;
21490 case '(':
21491 /* Nothing in unified syntax, otherwise the current condition code. */
21492 if (!TARGET_UNIFIED_ASM)
21493 arm_print_condition (stream);
21494 break;
21496 case ')':
21497 /* The current condition code in unified syntax, otherwise nothing. */
21498 if (TARGET_UNIFIED_ASM)
21499 arm_print_condition (stream);
21500 break;
21502 case '.':
21503 /* The current condition code for a condition code setting instruction.
21504 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21505 if (TARGET_UNIFIED_ASM)
21507 fputc('s', stream);
21508 arm_print_condition (stream);
21510 else
21512 arm_print_condition (stream);
21513 fputc('s', stream);
21515 return;
21517 case '!':
21518 /* If the instruction is conditionally executed then print
21519 the current condition code, otherwise print 's'. */
21520 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21521 if (current_insn_predicate)
21522 arm_print_condition (stream);
21523 else
21524 fputc('s', stream);
21525 break;
21527 /* %# is a "break" sequence. It doesn't output anything, but is used to
21528 separate e.g. operand numbers from following text, if that text consists
21529 of further digits which we don't want to be part of the operand
21530 number. */
21531 case '#':
21532 return;
21534 case 'N':
21536 REAL_VALUE_TYPE r;
21537 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21538 r = real_value_negate (&r);
21539 fprintf (stream, "%s", fp_const_from_val (&r));
21541 return;
21543 /* An integer or symbol address without a preceding # sign. */
21544 case 'c':
21545 switch (GET_CODE (x))
21547 case CONST_INT:
21548 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21549 break;
21551 case SYMBOL_REF:
21552 output_addr_const (stream, x);
21553 break;
21555 case CONST:
21556 if (GET_CODE (XEXP (x, 0)) == PLUS
21557 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21559 output_addr_const (stream, x);
21560 break;
21562 /* Fall through. */
21564 default:
21565 output_operand_lossage ("Unsupported operand for code '%c'", code);
21567 return;
21569 /* An integer that we want to print in HEX. */
21570 case 'x':
21571 switch (GET_CODE (x))
21573 case CONST_INT:
21574 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21575 break;
21577 default:
21578 output_operand_lossage ("Unsupported operand for code '%c'", code);
21580 return;
21582 case 'B':
21583 if (CONST_INT_P (x))
21585 HOST_WIDE_INT val;
21586 val = ARM_SIGN_EXTEND (~INTVAL (x));
21587 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21589 else
21591 putc ('~', stream);
21592 output_addr_const (stream, x);
21594 return;
21596 case 'b':
21597 /* Print the log2 of a CONST_INT. */
21599 HOST_WIDE_INT val;
21601 if (!CONST_INT_P (x)
21602 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21603 output_operand_lossage ("Unsupported operand for code '%c'", code);
21604 else
21605 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21607 return;
21609 case 'L':
21610 /* The low 16 bits of an immediate constant. */
21611 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21612 return;
21614 case 'i':
21615 fprintf (stream, "%s", arithmetic_instr (x, 1));
21616 return;
21618 case 'I':
21619 fprintf (stream, "%s", arithmetic_instr (x, 0));
21620 return;
21622 case 'S':
21624 HOST_WIDE_INT val;
21625 const char *shift;
21627 shift = shift_op (x, &val);
21629 if (shift)
21631 fprintf (stream, ", %s ", shift);
21632 if (val == -1)
21633 arm_print_operand (stream, XEXP (x, 1), 0);
21634 else
21635 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21638 return;
21640 /* An explanation of the 'Q', 'R' and 'H' register operands:
21642 In a pair of registers containing a DI or DF value the 'Q'
21643 operand returns the register number of the register containing
21644 the least significant part of the value. The 'R' operand returns
21645 the register number of the register containing the most
21646 significant part of the value.
21648 The 'H' operand returns the higher of the two register numbers.
21649 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21650 same as the 'Q' operand, since the most significant part of the
21651 value is held in the lower number register. The reverse is true
21652 on systems where WORDS_BIG_ENDIAN is false.
21654 The purpose of these operands is to distinguish between cases
21655 where the endian-ness of the values is important (for example
21656 when they are added together), and cases where the endian-ness
21657 is irrelevant, but the order of register operations is important.
21658 For example when loading a value from memory into a register
21659 pair, the endian-ness does not matter. Provided that the value
21660 from the lower memory address is put into the lower numbered
21661 register, and the value from the higher address is put into the
21662 higher numbered register, the load will work regardless of whether
21663 the value being loaded is big-wordian or little-wordian. The
21664 order of the two register loads can matter however, if the address
21665 of the memory location is actually held in one of the registers
21666 being overwritten by the load.
21668 The 'Q' and 'R' constraints are also available for 64-bit
21669 constants. */
21670 case 'Q':
21671 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21673 rtx part = gen_lowpart (SImode, x);
21674 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21675 return;
21678 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21680 output_operand_lossage ("invalid operand for code '%c'", code);
21681 return;
21684 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21685 return;
21687 case 'R':
21688 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21690 machine_mode mode = GET_MODE (x);
21691 rtx part;
21693 if (mode == VOIDmode)
21694 mode = DImode;
21695 part = gen_highpart_mode (SImode, mode, x);
21696 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21697 return;
21700 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21702 output_operand_lossage ("invalid operand for code '%c'", code);
21703 return;
21706 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21707 return;
21709 case 'H':
21710 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21712 output_operand_lossage ("invalid operand for code '%c'", code);
21713 return;
21716 asm_fprintf (stream, "%r", REGNO (x) + 1);
21717 return;
21719 case 'J':
21720 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21722 output_operand_lossage ("invalid operand for code '%c'", code);
21723 return;
21726 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21727 return;
21729 case 'K':
21730 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21732 output_operand_lossage ("invalid operand for code '%c'", code);
21733 return;
21736 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21737 return;
21739 case 'm':
21740 asm_fprintf (stream, "%r",
21741 REG_P (XEXP (x, 0))
21742 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21743 return;
21745 case 'M':
21746 asm_fprintf (stream, "{%r-%r}",
21747 REGNO (x),
21748 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21749 return;
21751 /* Like 'M', but writing doubleword vector registers, for use by Neon
21752 insns. */
21753 case 'h':
21755 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21756 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21757 if (numregs == 1)
21758 asm_fprintf (stream, "{d%d}", regno);
21759 else
21760 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21762 return;
21764 case 'd':
21765 /* CONST_TRUE_RTX means always -- that's the default. */
21766 if (x == const_true_rtx)
21767 return;
21769 if (!COMPARISON_P (x))
21771 output_operand_lossage ("invalid operand for code '%c'", code);
21772 return;
21775 fputs (arm_condition_codes[get_arm_condition_code (x)],
21776 stream);
21777 return;
21779 case 'D':
21780 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21781 want to do that. */
21782 if (x == const_true_rtx)
21784 output_operand_lossage ("instruction never executed");
21785 return;
21787 if (!COMPARISON_P (x))
21789 output_operand_lossage ("invalid operand for code '%c'", code);
21790 return;
21793 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21794 (get_arm_condition_code (x))],
21795 stream);
21796 return;
21798 case 's':
21799 case 'V':
21800 case 'W':
21801 case 'X':
21802 case 'Y':
21803 case 'Z':
21804 /* Former Maverick support, removed after GCC-4.7. */
21805 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21806 return;
21808 case 'U':
21809 if (!REG_P (x)
21810 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21811 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21812 /* Bad value for wCG register number. */
21814 output_operand_lossage ("invalid operand for code '%c'", code);
21815 return;
21818 else
21819 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21820 return;
21822 /* Print an iWMMXt control register name. */
21823 case 'w':
21824 if (!CONST_INT_P (x)
21825 || INTVAL (x) < 0
21826 || INTVAL (x) >= 16)
21827 /* Bad value for wC register number. */
21829 output_operand_lossage ("invalid operand for code '%c'", code);
21830 return;
21833 else
21835 static const char * wc_reg_names [16] =
21837 "wCID", "wCon", "wCSSF", "wCASF",
21838 "wC4", "wC5", "wC6", "wC7",
21839 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21840 "wC12", "wC13", "wC14", "wC15"
21843 fputs (wc_reg_names [INTVAL (x)], stream);
21845 return;
21847 /* Print the high single-precision register of a VFP double-precision
21848 register. */
21849 case 'p':
21851 machine_mode mode = GET_MODE (x);
21852 int regno;
21854 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21856 output_operand_lossage ("invalid operand for code '%c'", code);
21857 return;
21860 regno = REGNO (x);
21861 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21863 output_operand_lossage ("invalid operand for code '%c'", code);
21864 return;
21867 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21869 return;
21871 /* Print a VFP/Neon double precision or quad precision register name. */
21872 case 'P':
21873 case 'q':
21875 machine_mode mode = GET_MODE (x);
21876 int is_quad = (code == 'q');
21877 int regno;
21879 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21881 output_operand_lossage ("invalid operand for code '%c'", code);
21882 return;
21885 if (!REG_P (x)
21886 || !IS_VFP_REGNUM (REGNO (x)))
21888 output_operand_lossage ("invalid operand for code '%c'", code);
21889 return;
21892 regno = REGNO (x);
21893 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21894 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21896 output_operand_lossage ("invalid operand for code '%c'", code);
21897 return;
21900 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21901 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21903 return;
21905 /* These two codes print the low/high doubleword register of a Neon quad
21906 register, respectively. For pair-structure types, can also print
21907 low/high quadword registers. */
21908 case 'e':
21909 case 'f':
21911 machine_mode mode = GET_MODE (x);
21912 int regno;
21914 if ((GET_MODE_SIZE (mode) != 16
21915 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21917 output_operand_lossage ("invalid operand for code '%c'", code);
21918 return;
21921 regno = REGNO (x);
21922 if (!NEON_REGNO_OK_FOR_QUAD (regno))
21924 output_operand_lossage ("invalid operand for code '%c'", code);
21925 return;
21928 if (GET_MODE_SIZE (mode) == 16)
21929 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21930 + (code == 'f' ? 1 : 0));
21931 else
21932 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21933 + (code == 'f' ? 1 : 0));
21935 return;
21937 /* Print a VFPv3 floating-point constant, represented as an integer
21938 index. */
21939 case 'G':
21941 int index = vfp3_const_double_index (x);
21942 gcc_assert (index != -1);
21943 fprintf (stream, "%d", index);
21945 return;
21947 /* Print bits representing opcode features for Neon.
21949 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21950 and polynomials as unsigned.
21952 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21954 Bit 2 is 1 for rounding functions, 0 otherwise. */
21956 /* Identify the type as 's', 'u', 'p' or 'f'. */
21957 case 'T':
21959 HOST_WIDE_INT bits = INTVAL (x);
21960 fputc ("uspf"[bits & 3], stream);
21962 return;
21964 /* Likewise, but signed and unsigned integers are both 'i'. */
21965 case 'F':
21967 HOST_WIDE_INT bits = INTVAL (x);
21968 fputc ("iipf"[bits & 3], stream);
21970 return;
21972 /* As for 'T', but emit 'u' instead of 'p'. */
21973 case 't':
21975 HOST_WIDE_INT bits = INTVAL (x);
21976 fputc ("usuf"[bits & 3], stream);
21978 return;
21980 /* Bit 2: rounding (vs none). */
21981 case 'O':
21983 HOST_WIDE_INT bits = INTVAL (x);
21984 fputs ((bits & 4) != 0 ? "r" : "", stream);
21986 return;
21988 /* Memory operand for vld1/vst1 instruction. */
21989 case 'A':
21991 rtx addr;
21992 bool postinc = FALSE;
21993 rtx postinc_reg = NULL;
21994 unsigned align, memsize, align_bits;
21996 gcc_assert (MEM_P (x));
21997 addr = XEXP (x, 0);
21998 if (GET_CODE (addr) == POST_INC)
22000 postinc = 1;
22001 addr = XEXP (addr, 0);
22003 if (GET_CODE (addr) == POST_MODIFY)
22005 postinc_reg = XEXP( XEXP (addr, 1), 1);
22006 addr = XEXP (addr, 0);
22008 asm_fprintf (stream, "[%r", REGNO (addr));
22010 /* We know the alignment of this access, so we can emit a hint in the
22011 instruction (for some alignments) as an aid to the memory subsystem
22012 of the target. */
22013 align = MEM_ALIGN (x) >> 3;
22014 memsize = MEM_SIZE (x);
22016 /* Only certain alignment specifiers are supported by the hardware. */
22017 if (memsize == 32 && (align % 32) == 0)
22018 align_bits = 256;
22019 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22020 align_bits = 128;
22021 else if (memsize >= 8 && (align % 8) == 0)
22022 align_bits = 64;
22023 else
22024 align_bits = 0;
22026 if (align_bits != 0)
22027 asm_fprintf (stream, ":%d", align_bits);
22029 asm_fprintf (stream, "]");
22031 if (postinc)
22032 fputs("!", stream);
22033 if (postinc_reg)
22034 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22036 return;
22038 case 'C':
22040 rtx addr;
22042 gcc_assert (MEM_P (x));
22043 addr = XEXP (x, 0);
22044 gcc_assert (REG_P (addr));
22045 asm_fprintf (stream, "[%r]", REGNO (addr));
22047 return;
22049 /* Translate an S register number into a D register number and element index. */
22050 case 'y':
22052 machine_mode mode = GET_MODE (x);
22053 int regno;
22055 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22057 output_operand_lossage ("invalid operand for code '%c'", code);
22058 return;
22061 regno = REGNO (x);
22062 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22064 output_operand_lossage ("invalid operand for code '%c'", code);
22065 return;
22068 regno = regno - FIRST_VFP_REGNUM;
22069 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22071 return;
22073 case 'v':
22074 gcc_assert (CONST_DOUBLE_P (x));
22075 int result;
22076 result = vfp3_const_double_for_fract_bits (x);
22077 if (result == 0)
22078 result = vfp3_const_double_for_bits (x);
22079 fprintf (stream, "#%d", result);
22080 return;
22082 /* Register specifier for vld1.16/vst1.16. Translate the S register
22083 number into a D register number and element index. */
22084 case 'z':
22086 machine_mode mode = GET_MODE (x);
22087 int regno;
22089 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22091 output_operand_lossage ("invalid operand for code '%c'", code);
22092 return;
22095 regno = REGNO (x);
22096 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22098 output_operand_lossage ("invalid operand for code '%c'", code);
22099 return;
22102 regno = regno - FIRST_VFP_REGNUM;
22103 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22105 return;
22107 default:
22108 if (x == 0)
22110 output_operand_lossage ("missing operand");
22111 return;
22114 switch (GET_CODE (x))
22116 case REG:
22117 asm_fprintf (stream, "%r", REGNO (x));
22118 break;
22120 case MEM:
22121 output_memory_reference_mode = GET_MODE (x);
22122 output_address (XEXP (x, 0));
22123 break;
22125 case CONST_DOUBLE:
22127 char fpstr[20];
22128 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22129 sizeof (fpstr), 0, 1);
22130 fprintf (stream, "#%s", fpstr);
22132 break;
22134 default:
22135 gcc_assert (GET_CODE (x) != NEG);
22136 fputc ('#', stream);
22137 if (GET_CODE (x) == HIGH)
22139 fputs (":lower16:", stream);
22140 x = XEXP (x, 0);
22143 output_addr_const (stream, x);
22144 break;
22149 /* Target hook for printing a memory address. */
22150 static void
22151 arm_print_operand_address (FILE *stream, rtx x)
22153 if (TARGET_32BIT)
22155 int is_minus = GET_CODE (x) == MINUS;
22157 if (REG_P (x))
22158 asm_fprintf (stream, "[%r]", REGNO (x));
22159 else if (GET_CODE (x) == PLUS || is_minus)
22161 rtx base = XEXP (x, 0);
22162 rtx index = XEXP (x, 1);
22163 HOST_WIDE_INT offset = 0;
22164 if (!REG_P (base)
22165 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22167 /* Ensure that BASE is a register. */
22168 /* (one of them must be). */
22169 /* Also ensure the SP is not used as in index register. */
22170 std::swap (base, index);
22172 switch (GET_CODE (index))
22174 case CONST_INT:
22175 offset = INTVAL (index);
22176 if (is_minus)
22177 offset = -offset;
22178 asm_fprintf (stream, "[%r, #%wd]",
22179 REGNO (base), offset);
22180 break;
22182 case REG:
22183 asm_fprintf (stream, "[%r, %s%r]",
22184 REGNO (base), is_minus ? "-" : "",
22185 REGNO (index));
22186 break;
22188 case MULT:
22189 case ASHIFTRT:
22190 case LSHIFTRT:
22191 case ASHIFT:
22192 case ROTATERT:
22194 asm_fprintf (stream, "[%r, %s%r",
22195 REGNO (base), is_minus ? "-" : "",
22196 REGNO (XEXP (index, 0)));
22197 arm_print_operand (stream, index, 'S');
22198 fputs ("]", stream);
22199 break;
22202 default:
22203 gcc_unreachable ();
22206 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22207 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22209 extern machine_mode output_memory_reference_mode;
22211 gcc_assert (REG_P (XEXP (x, 0)));
22213 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22214 asm_fprintf (stream, "[%r, #%s%d]!",
22215 REGNO (XEXP (x, 0)),
22216 GET_CODE (x) == PRE_DEC ? "-" : "",
22217 GET_MODE_SIZE (output_memory_reference_mode));
22218 else
22219 asm_fprintf (stream, "[%r], #%s%d",
22220 REGNO (XEXP (x, 0)),
22221 GET_CODE (x) == POST_DEC ? "-" : "",
22222 GET_MODE_SIZE (output_memory_reference_mode));
22224 else if (GET_CODE (x) == PRE_MODIFY)
22226 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22227 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22228 asm_fprintf (stream, "#%wd]!",
22229 INTVAL (XEXP (XEXP (x, 1), 1)));
22230 else
22231 asm_fprintf (stream, "%r]!",
22232 REGNO (XEXP (XEXP (x, 1), 1)));
22234 else if (GET_CODE (x) == POST_MODIFY)
22236 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22237 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22238 asm_fprintf (stream, "#%wd",
22239 INTVAL (XEXP (XEXP (x, 1), 1)));
22240 else
22241 asm_fprintf (stream, "%r",
22242 REGNO (XEXP (XEXP (x, 1), 1)));
22244 else output_addr_const (stream, x);
22246 else
22248 if (REG_P (x))
22249 asm_fprintf (stream, "[%r]", REGNO (x));
22250 else if (GET_CODE (x) == POST_INC)
22251 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22252 else if (GET_CODE (x) == PLUS)
22254 gcc_assert (REG_P (XEXP (x, 0)));
22255 if (CONST_INT_P (XEXP (x, 1)))
22256 asm_fprintf (stream, "[%r, #%wd]",
22257 REGNO (XEXP (x, 0)),
22258 INTVAL (XEXP (x, 1)));
22259 else
22260 asm_fprintf (stream, "[%r, %r]",
22261 REGNO (XEXP (x, 0)),
22262 REGNO (XEXP (x, 1)));
22264 else
22265 output_addr_const (stream, x);
22269 /* Target hook for indicating whether a punctuation character for
22270 TARGET_PRINT_OPERAND is valid. */
22271 static bool
22272 arm_print_operand_punct_valid_p (unsigned char code)
22274 return (code == '@' || code == '|' || code == '.'
22275 || code == '(' || code == ')' || code == '#'
22276 || (TARGET_32BIT && (code == '?'))
22277 || (TARGET_THUMB2 && (code == '!'))
22278 || (TARGET_THUMB && (code == '_')));
22281 /* Target hook for assembling integer objects. The ARM version needs to
22282 handle word-sized values specially. */
22283 static bool
22284 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22286 machine_mode mode;
22288 if (size == UNITS_PER_WORD && aligned_p)
22290 fputs ("\t.word\t", asm_out_file);
22291 output_addr_const (asm_out_file, x);
22293 /* Mark symbols as position independent. We only do this in the
22294 .text segment, not in the .data segment. */
22295 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22296 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22298 /* See legitimize_pic_address for an explanation of the
22299 TARGET_VXWORKS_RTP check. */
22300 if (!arm_pic_data_is_text_relative
22301 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22302 fputs ("(GOT)", asm_out_file);
22303 else
22304 fputs ("(GOTOFF)", asm_out_file);
22306 fputc ('\n', asm_out_file);
22307 return true;
22310 mode = GET_MODE (x);
22312 if (arm_vector_mode_supported_p (mode))
22314 int i, units;
22316 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22318 units = CONST_VECTOR_NUNITS (x);
22319 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22321 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22322 for (i = 0; i < units; i++)
22324 rtx elt = CONST_VECTOR_ELT (x, i);
22325 assemble_integer
22326 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22328 else
22329 for (i = 0; i < units; i++)
22331 rtx elt = CONST_VECTOR_ELT (x, i);
22332 REAL_VALUE_TYPE rval;
22334 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22336 assemble_real
22337 (rval, GET_MODE_INNER (mode),
22338 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22341 return true;
22344 return default_assemble_integer (x, size, aligned_p);
22347 static void
22348 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22350 section *s;
22352 if (!TARGET_AAPCS_BASED)
22354 (is_ctor ?
22355 default_named_section_asm_out_constructor
22356 : default_named_section_asm_out_destructor) (symbol, priority);
22357 return;
22360 /* Put these in the .init_array section, using a special relocation. */
22361 if (priority != DEFAULT_INIT_PRIORITY)
22363 char buf[18];
22364 sprintf (buf, "%s.%.5u",
22365 is_ctor ? ".init_array" : ".fini_array",
22366 priority);
22367 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22369 else if (is_ctor)
22370 s = ctors_section;
22371 else
22372 s = dtors_section;
22374 switch_to_section (s);
22375 assemble_align (POINTER_SIZE);
22376 fputs ("\t.word\t", asm_out_file);
22377 output_addr_const (asm_out_file, symbol);
22378 fputs ("(target1)\n", asm_out_file);
22381 /* Add a function to the list of static constructors. */
22383 static void
22384 arm_elf_asm_constructor (rtx symbol, int priority)
22386 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22389 /* Add a function to the list of static destructors. */
22391 static void
22392 arm_elf_asm_destructor (rtx symbol, int priority)
22394 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22397 /* A finite state machine takes care of noticing whether or not instructions
22398 can be conditionally executed, and thus decrease execution time and code
22399 size by deleting branch instructions. The fsm is controlled by
22400 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22402 /* The state of the fsm controlling condition codes are:
22403 0: normal, do nothing special
22404 1: make ASM_OUTPUT_OPCODE not output this instruction
22405 2: make ASM_OUTPUT_OPCODE not output this instruction
22406 3: make instructions conditional
22407 4: make instructions conditional
22409 State transitions (state->state by whom under condition):
22410 0 -> 1 final_prescan_insn if the `target' is a label
22411 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22412 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22413 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22414 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22415 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22416 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22417 (the target insn is arm_target_insn).
22419 If the jump clobbers the conditions then we use states 2 and 4.
22421 A similar thing can be done with conditional return insns.
22423 XXX In case the `target' is an unconditional branch, this conditionalising
22424 of the instructions always reduces code size, but not always execution
22425 time. But then, I want to reduce the code size to somewhere near what
22426 /bin/cc produces. */
22428 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22429 instructions. When a COND_EXEC instruction is seen the subsequent
22430 instructions are scanned so that multiple conditional instructions can be
22431 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22432 specify the length and true/false mask for the IT block. These will be
22433 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22435 /* Returns the index of the ARM condition code string in
22436 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22437 COMPARISON should be an rtx like `(eq (...) (...))'. */
22439 enum arm_cond_code
22440 maybe_get_arm_condition_code (rtx comparison)
22442 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22443 enum arm_cond_code code;
22444 enum rtx_code comp_code = GET_CODE (comparison);
22446 if (GET_MODE_CLASS (mode) != MODE_CC)
22447 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22448 XEXP (comparison, 1));
22450 switch (mode)
22452 case CC_DNEmode: code = ARM_NE; goto dominance;
22453 case CC_DEQmode: code = ARM_EQ; goto dominance;
22454 case CC_DGEmode: code = ARM_GE; goto dominance;
22455 case CC_DGTmode: code = ARM_GT; goto dominance;
22456 case CC_DLEmode: code = ARM_LE; goto dominance;
22457 case CC_DLTmode: code = ARM_LT; goto dominance;
22458 case CC_DGEUmode: code = ARM_CS; goto dominance;
22459 case CC_DGTUmode: code = ARM_HI; goto dominance;
22460 case CC_DLEUmode: code = ARM_LS; goto dominance;
22461 case CC_DLTUmode: code = ARM_CC;
22463 dominance:
22464 if (comp_code == EQ)
22465 return ARM_INVERSE_CONDITION_CODE (code);
22466 if (comp_code == NE)
22467 return code;
22468 return ARM_NV;
22470 case CC_NOOVmode:
22471 switch (comp_code)
22473 case NE: return ARM_NE;
22474 case EQ: return ARM_EQ;
22475 case GE: return ARM_PL;
22476 case LT: return ARM_MI;
22477 default: return ARM_NV;
22480 case CC_Zmode:
22481 switch (comp_code)
22483 case NE: return ARM_NE;
22484 case EQ: return ARM_EQ;
22485 default: return ARM_NV;
22488 case CC_Nmode:
22489 switch (comp_code)
22491 case NE: return ARM_MI;
22492 case EQ: return ARM_PL;
22493 default: return ARM_NV;
22496 case CCFPEmode:
22497 case CCFPmode:
22498 /* We can handle all cases except UNEQ and LTGT. */
22499 switch (comp_code)
22501 case GE: return ARM_GE;
22502 case GT: return ARM_GT;
22503 case LE: return ARM_LS;
22504 case LT: return ARM_MI;
22505 case NE: return ARM_NE;
22506 case EQ: return ARM_EQ;
22507 case ORDERED: return ARM_VC;
22508 case UNORDERED: return ARM_VS;
22509 case UNLT: return ARM_LT;
22510 case UNLE: return ARM_LE;
22511 case UNGT: return ARM_HI;
22512 case UNGE: return ARM_PL;
22513 /* UNEQ and LTGT do not have a representation. */
22514 case UNEQ: /* Fall through. */
22515 case LTGT: /* Fall through. */
22516 default: return ARM_NV;
22519 case CC_SWPmode:
22520 switch (comp_code)
22522 case NE: return ARM_NE;
22523 case EQ: return ARM_EQ;
22524 case GE: return ARM_LE;
22525 case GT: return ARM_LT;
22526 case LE: return ARM_GE;
22527 case LT: return ARM_GT;
22528 case GEU: return ARM_LS;
22529 case GTU: return ARM_CC;
22530 case LEU: return ARM_CS;
22531 case LTU: return ARM_HI;
22532 default: return ARM_NV;
22535 case CC_Cmode:
22536 switch (comp_code)
22538 case LTU: return ARM_CS;
22539 case GEU: return ARM_CC;
22540 default: return ARM_NV;
22543 case CC_CZmode:
22544 switch (comp_code)
22546 case NE: return ARM_NE;
22547 case EQ: return ARM_EQ;
22548 case GEU: return ARM_CS;
22549 case GTU: return ARM_HI;
22550 case LEU: return ARM_LS;
22551 case LTU: return ARM_CC;
22552 default: return ARM_NV;
22555 case CC_NCVmode:
22556 switch (comp_code)
22558 case GE: return ARM_GE;
22559 case LT: return ARM_LT;
22560 case GEU: return ARM_CS;
22561 case LTU: return ARM_CC;
22562 default: return ARM_NV;
22565 case CCmode:
22566 switch (comp_code)
22568 case NE: return ARM_NE;
22569 case EQ: return ARM_EQ;
22570 case GE: return ARM_GE;
22571 case GT: return ARM_GT;
22572 case LE: return ARM_LE;
22573 case LT: return ARM_LT;
22574 case GEU: return ARM_CS;
22575 case GTU: return ARM_HI;
22576 case LEU: return ARM_LS;
22577 case LTU: return ARM_CC;
22578 default: return ARM_NV;
22581 default: gcc_unreachable ();
22585 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22586 static enum arm_cond_code
22587 get_arm_condition_code (rtx comparison)
22589 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22590 gcc_assert (code != ARM_NV);
22591 return code;
22594 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22595 instructions. */
22596 void
22597 thumb2_final_prescan_insn (rtx_insn *insn)
22599 rtx_insn *first_insn = insn;
22600 rtx body = PATTERN (insn);
22601 rtx predicate;
22602 enum arm_cond_code code;
22603 int n;
22604 int mask;
22605 int max;
22607 /* max_insns_skipped in the tune was already taken into account in the
22608 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22609 just emit the IT blocks as we can. It does not make sense to split
22610 the IT blocks. */
22611 max = MAX_INSN_PER_IT_BLOCK;
22613 /* Remove the previous insn from the count of insns to be output. */
22614 if (arm_condexec_count)
22615 arm_condexec_count--;
22617 /* Nothing to do if we are already inside a conditional block. */
22618 if (arm_condexec_count)
22619 return;
22621 if (GET_CODE (body) != COND_EXEC)
22622 return;
22624 /* Conditional jumps are implemented directly. */
22625 if (JUMP_P (insn))
22626 return;
22628 predicate = COND_EXEC_TEST (body);
22629 arm_current_cc = get_arm_condition_code (predicate);
22631 n = get_attr_ce_count (insn);
22632 arm_condexec_count = 1;
22633 arm_condexec_mask = (1 << n) - 1;
22634 arm_condexec_masklen = n;
22635 /* See if subsequent instructions can be combined into the same block. */
22636 for (;;)
22638 insn = next_nonnote_insn (insn);
22640 /* Jumping into the middle of an IT block is illegal, so a label or
22641 barrier terminates the block. */
22642 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22643 break;
22645 body = PATTERN (insn);
22646 /* USE and CLOBBER aren't really insns, so just skip them. */
22647 if (GET_CODE (body) == USE
22648 || GET_CODE (body) == CLOBBER)
22649 continue;
22651 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22652 if (GET_CODE (body) != COND_EXEC)
22653 break;
22654 /* Maximum number of conditionally executed instructions in a block. */
22655 n = get_attr_ce_count (insn);
22656 if (arm_condexec_masklen + n > max)
22657 break;
22659 predicate = COND_EXEC_TEST (body);
22660 code = get_arm_condition_code (predicate);
22661 mask = (1 << n) - 1;
22662 if (arm_current_cc == code)
22663 arm_condexec_mask |= (mask << arm_condexec_masklen);
22664 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22665 break;
22667 arm_condexec_count++;
22668 arm_condexec_masklen += n;
22670 /* A jump must be the last instruction in a conditional block. */
22671 if (JUMP_P (insn))
22672 break;
22674 /* Restore recog_data (getting the attributes of other insns can
22675 destroy this array, but final.c assumes that it remains intact
22676 across this call). */
22677 extract_constrain_insn_cached (first_insn);
22680 void
22681 arm_final_prescan_insn (rtx_insn *insn)
22683 /* BODY will hold the body of INSN. */
22684 rtx body = PATTERN (insn);
22686 /* This will be 1 if trying to repeat the trick, and things need to be
22687 reversed if it appears to fail. */
22688 int reverse = 0;
22690 /* If we start with a return insn, we only succeed if we find another one. */
22691 int seeking_return = 0;
22692 enum rtx_code return_code = UNKNOWN;
22694 /* START_INSN will hold the insn from where we start looking. This is the
22695 first insn after the following code_label if REVERSE is true. */
22696 rtx_insn *start_insn = insn;
22698 /* If in state 4, check if the target branch is reached, in order to
22699 change back to state 0. */
22700 if (arm_ccfsm_state == 4)
22702 if (insn == arm_target_insn)
22704 arm_target_insn = NULL;
22705 arm_ccfsm_state = 0;
22707 return;
22710 /* If in state 3, it is possible to repeat the trick, if this insn is an
22711 unconditional branch to a label, and immediately following this branch
22712 is the previous target label which is only used once, and the label this
22713 branch jumps to is not too far off. */
22714 if (arm_ccfsm_state == 3)
22716 if (simplejump_p (insn))
22718 start_insn = next_nonnote_insn (start_insn);
22719 if (BARRIER_P (start_insn))
22721 /* XXX Isn't this always a barrier? */
22722 start_insn = next_nonnote_insn (start_insn);
22724 if (LABEL_P (start_insn)
22725 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22726 && LABEL_NUSES (start_insn) == 1)
22727 reverse = TRUE;
22728 else
22729 return;
22731 else if (ANY_RETURN_P (body))
22733 start_insn = next_nonnote_insn (start_insn);
22734 if (BARRIER_P (start_insn))
22735 start_insn = next_nonnote_insn (start_insn);
22736 if (LABEL_P (start_insn)
22737 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22738 && LABEL_NUSES (start_insn) == 1)
22740 reverse = TRUE;
22741 seeking_return = 1;
22742 return_code = GET_CODE (body);
22744 else
22745 return;
22747 else
22748 return;
22751 gcc_assert (!arm_ccfsm_state || reverse);
22752 if (!JUMP_P (insn))
22753 return;
22755 /* This jump might be paralleled with a clobber of the condition codes
22756 the jump should always come first */
22757 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22758 body = XVECEXP (body, 0, 0);
22760 if (reverse
22761 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22762 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22764 int insns_skipped;
22765 int fail = FALSE, succeed = FALSE;
22766 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22767 int then_not_else = TRUE;
22768 rtx_insn *this_insn = start_insn;
22769 rtx label = 0;
22771 /* Register the insn jumped to. */
22772 if (reverse)
22774 if (!seeking_return)
22775 label = XEXP (SET_SRC (body), 0);
22777 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22778 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22779 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22781 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22782 then_not_else = FALSE;
22784 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22786 seeking_return = 1;
22787 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22789 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22791 seeking_return = 1;
22792 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22793 then_not_else = FALSE;
22795 else
22796 gcc_unreachable ();
22798 /* See how many insns this branch skips, and what kind of insns. If all
22799 insns are okay, and the label or unconditional branch to the same
22800 label is not too far away, succeed. */
22801 for (insns_skipped = 0;
22802 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22804 rtx scanbody;
22806 this_insn = next_nonnote_insn (this_insn);
22807 if (!this_insn)
22808 break;
22810 switch (GET_CODE (this_insn))
22812 case CODE_LABEL:
22813 /* Succeed if it is the target label, otherwise fail since
22814 control falls in from somewhere else. */
22815 if (this_insn == label)
22817 arm_ccfsm_state = 1;
22818 succeed = TRUE;
22820 else
22821 fail = TRUE;
22822 break;
22824 case BARRIER:
22825 /* Succeed if the following insn is the target label.
22826 Otherwise fail.
22827 If return insns are used then the last insn in a function
22828 will be a barrier. */
22829 this_insn = next_nonnote_insn (this_insn);
22830 if (this_insn && this_insn == label)
22832 arm_ccfsm_state = 1;
22833 succeed = TRUE;
22835 else
22836 fail = TRUE;
22837 break;
22839 case CALL_INSN:
22840 /* The AAPCS says that conditional calls should not be
22841 used since they make interworking inefficient (the
22842 linker can't transform BL<cond> into BLX). That's
22843 only a problem if the machine has BLX. */
22844 if (arm_arch5)
22846 fail = TRUE;
22847 break;
22850 /* Succeed if the following insn is the target label, or
22851 if the following two insns are a barrier and the
22852 target label. */
22853 this_insn = next_nonnote_insn (this_insn);
22854 if (this_insn && BARRIER_P (this_insn))
22855 this_insn = next_nonnote_insn (this_insn);
22857 if (this_insn && this_insn == label
22858 && insns_skipped < max_insns_skipped)
22860 arm_ccfsm_state = 1;
22861 succeed = TRUE;
22863 else
22864 fail = TRUE;
22865 break;
22867 case JUMP_INSN:
22868 /* If this is an unconditional branch to the same label, succeed.
22869 If it is to another label, do nothing. If it is conditional,
22870 fail. */
22871 /* XXX Probably, the tests for SET and the PC are
22872 unnecessary. */
22874 scanbody = PATTERN (this_insn);
22875 if (GET_CODE (scanbody) == SET
22876 && GET_CODE (SET_DEST (scanbody)) == PC)
22878 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22879 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22881 arm_ccfsm_state = 2;
22882 succeed = TRUE;
22884 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22885 fail = TRUE;
22887 /* Fail if a conditional return is undesirable (e.g. on a
22888 StrongARM), but still allow this if optimizing for size. */
22889 else if (GET_CODE (scanbody) == return_code
22890 && !use_return_insn (TRUE, NULL)
22891 && !optimize_size)
22892 fail = TRUE;
22893 else if (GET_CODE (scanbody) == return_code)
22895 arm_ccfsm_state = 2;
22896 succeed = TRUE;
22898 else if (GET_CODE (scanbody) == PARALLEL)
22900 switch (get_attr_conds (this_insn))
22902 case CONDS_NOCOND:
22903 break;
22904 default:
22905 fail = TRUE;
22906 break;
22909 else
22910 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
22912 break;
22914 case INSN:
22915 /* Instructions using or affecting the condition codes make it
22916 fail. */
22917 scanbody = PATTERN (this_insn);
22918 if (!(GET_CODE (scanbody) == SET
22919 || GET_CODE (scanbody) == PARALLEL)
22920 || get_attr_conds (this_insn) != CONDS_NOCOND)
22921 fail = TRUE;
22922 break;
22924 default:
22925 break;
22928 if (succeed)
22930 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22931 arm_target_label = CODE_LABEL_NUMBER (label);
22932 else
22934 gcc_assert (seeking_return || arm_ccfsm_state == 2);
22936 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22938 this_insn = next_nonnote_insn (this_insn);
22939 gcc_assert (!this_insn
22940 || (!BARRIER_P (this_insn)
22941 && !LABEL_P (this_insn)));
22943 if (!this_insn)
22945 /* Oh, dear! we ran off the end.. give up. */
22946 extract_constrain_insn_cached (insn);
22947 arm_ccfsm_state = 0;
22948 arm_target_insn = NULL;
22949 return;
22951 arm_target_insn = this_insn;
22954 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22955 what it was. */
22956 if (!reverse)
22957 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22959 if (reverse || then_not_else)
22960 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22963 /* Restore recog_data (getting the attributes of other insns can
22964 destroy this array, but final.c assumes that it remains intact
22965 across this call. */
22966 extract_constrain_insn_cached (insn);
22970 /* Output IT instructions. */
22971 void
22972 thumb2_asm_output_opcode (FILE * stream)
22974 char buff[5];
22975 int n;
22977 if (arm_condexec_mask)
22979 for (n = 0; n < arm_condexec_masklen; n++)
22980 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
22981 buff[n] = 0;
22982 asm_fprintf(stream, "i%s\t%s\n\t", buff,
22983 arm_condition_codes[arm_current_cc]);
22984 arm_condexec_mask = 0;
22988 /* Returns true if REGNO is a valid register
22989 for holding a quantity of type MODE. */
22991 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
22993 if (GET_MODE_CLASS (mode) == MODE_CC)
22994 return (regno == CC_REGNUM
22995 || (TARGET_HARD_FLOAT && TARGET_VFP
22996 && regno == VFPCC_REGNUM));
22998 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
22999 return false;
23001 if (TARGET_THUMB1)
23002 /* For the Thumb we only allow values bigger than SImode in
23003 registers 0 - 6, so that there is always a second low
23004 register available to hold the upper part of the value.
23005 We probably we ought to ensure that the register is the
23006 start of an even numbered register pair. */
23007 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23009 if (TARGET_HARD_FLOAT && TARGET_VFP
23010 && IS_VFP_REGNUM (regno))
23012 if (mode == SFmode || mode == SImode)
23013 return VFP_REGNO_OK_FOR_SINGLE (regno);
23015 if (mode == DFmode)
23016 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23018 /* VFP registers can hold HFmode values, but there is no point in
23019 putting them there unless we have hardware conversion insns. */
23020 if (mode == HFmode)
23021 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
23023 if (TARGET_NEON)
23024 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23025 || (VALID_NEON_QREG_MODE (mode)
23026 && NEON_REGNO_OK_FOR_QUAD (regno))
23027 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23028 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23029 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23030 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23031 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23033 return FALSE;
23036 if (TARGET_REALLY_IWMMXT)
23038 if (IS_IWMMXT_GR_REGNUM (regno))
23039 return mode == SImode;
23041 if (IS_IWMMXT_REGNUM (regno))
23042 return VALID_IWMMXT_REG_MODE (mode);
23045 /* We allow almost any value to be stored in the general registers.
23046 Restrict doubleword quantities to even register pairs in ARM state
23047 so that we can use ldrd. Do not allow very large Neon structure
23048 opaque modes in general registers; they would use too many. */
23049 if (regno <= LAST_ARM_REGNUM)
23051 if (ARM_NUM_REGS (mode) > 4)
23052 return FALSE;
23054 if (TARGET_THUMB2)
23055 return TRUE;
23057 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23060 if (regno == FRAME_POINTER_REGNUM
23061 || regno == ARG_POINTER_REGNUM)
23062 /* We only allow integers in the fake hard registers. */
23063 return GET_MODE_CLASS (mode) == MODE_INT;
23065 return FALSE;
23068 /* Implement MODES_TIEABLE_P. */
23070 bool
23071 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23073 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23074 return true;
23076 /* We specifically want to allow elements of "structure" modes to
23077 be tieable to the structure. This more general condition allows
23078 other rarer situations too. */
23079 if (TARGET_NEON
23080 && (VALID_NEON_DREG_MODE (mode1)
23081 || VALID_NEON_QREG_MODE (mode1)
23082 || VALID_NEON_STRUCT_MODE (mode1))
23083 && (VALID_NEON_DREG_MODE (mode2)
23084 || VALID_NEON_QREG_MODE (mode2)
23085 || VALID_NEON_STRUCT_MODE (mode2)))
23086 return true;
23088 return false;
23091 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23092 not used in arm mode. */
23094 enum reg_class
23095 arm_regno_class (int regno)
23097 if (regno == PC_REGNUM)
23098 return NO_REGS;
23100 if (TARGET_THUMB1)
23102 if (regno == STACK_POINTER_REGNUM)
23103 return STACK_REG;
23104 if (regno == CC_REGNUM)
23105 return CC_REG;
23106 if (regno < 8)
23107 return LO_REGS;
23108 return HI_REGS;
23111 if (TARGET_THUMB2 && regno < 8)
23112 return LO_REGS;
23114 if ( regno <= LAST_ARM_REGNUM
23115 || regno == FRAME_POINTER_REGNUM
23116 || regno == ARG_POINTER_REGNUM)
23117 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23119 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23120 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23122 if (IS_VFP_REGNUM (regno))
23124 if (regno <= D7_VFP_REGNUM)
23125 return VFP_D0_D7_REGS;
23126 else if (regno <= LAST_LO_VFP_REGNUM)
23127 return VFP_LO_REGS;
23128 else
23129 return VFP_HI_REGS;
23132 if (IS_IWMMXT_REGNUM (regno))
23133 return IWMMXT_REGS;
23135 if (IS_IWMMXT_GR_REGNUM (regno))
23136 return IWMMXT_GR_REGS;
23138 return NO_REGS;
23141 /* Handle a special case when computing the offset
23142 of an argument from the frame pointer. */
23144 arm_debugger_arg_offset (int value, rtx addr)
23146 rtx_insn *insn;
23148 /* We are only interested if dbxout_parms() failed to compute the offset. */
23149 if (value != 0)
23150 return 0;
23152 /* We can only cope with the case where the address is held in a register. */
23153 if (!REG_P (addr))
23154 return 0;
23156 /* If we are using the frame pointer to point at the argument, then
23157 an offset of 0 is correct. */
23158 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23159 return 0;
23161 /* If we are using the stack pointer to point at the
23162 argument, then an offset of 0 is correct. */
23163 /* ??? Check this is consistent with thumb2 frame layout. */
23164 if ((TARGET_THUMB || !frame_pointer_needed)
23165 && REGNO (addr) == SP_REGNUM)
23166 return 0;
23168 /* Oh dear. The argument is pointed to by a register rather
23169 than being held in a register, or being stored at a known
23170 offset from the frame pointer. Since GDB only understands
23171 those two kinds of argument we must translate the address
23172 held in the register into an offset from the frame pointer.
23173 We do this by searching through the insns for the function
23174 looking to see where this register gets its value. If the
23175 register is initialized from the frame pointer plus an offset
23176 then we are in luck and we can continue, otherwise we give up.
23178 This code is exercised by producing debugging information
23179 for a function with arguments like this:
23181 double func (double a, double b, int c, double d) {return d;}
23183 Without this code the stab for parameter 'd' will be set to
23184 an offset of 0 from the frame pointer, rather than 8. */
23186 /* The if() statement says:
23188 If the insn is a normal instruction
23189 and if the insn is setting the value in a register
23190 and if the register being set is the register holding the address of the argument
23191 and if the address is computing by an addition
23192 that involves adding to a register
23193 which is the frame pointer
23194 a constant integer
23196 then... */
23198 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23200 if ( NONJUMP_INSN_P (insn)
23201 && GET_CODE (PATTERN (insn)) == SET
23202 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23203 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23204 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23205 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23206 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23209 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23211 break;
23215 if (value == 0)
23217 debug_rtx (addr);
23218 warning (0, "unable to compute real location of stacked parameter");
23219 value = 8; /* XXX magic hack */
23222 return value;
23225 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23227 static const char *
23228 arm_invalid_parameter_type (const_tree t)
23230 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23231 return N_("function parameters cannot have __fp16 type");
23232 return NULL;
23235 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23237 static const char *
23238 arm_invalid_return_type (const_tree t)
23240 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23241 return N_("functions cannot return __fp16 type");
23242 return NULL;
23245 /* Implement TARGET_PROMOTED_TYPE. */
23247 static tree
23248 arm_promoted_type (const_tree t)
23250 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23251 return float_type_node;
23252 return NULL_TREE;
23255 /* Implement TARGET_CONVERT_TO_TYPE.
23256 Specifically, this hook implements the peculiarity of the ARM
23257 half-precision floating-point C semantics that requires conversions between
23258 __fp16 to or from double to do an intermediate conversion to float. */
23260 static tree
23261 arm_convert_to_type (tree type, tree expr)
23263 tree fromtype = TREE_TYPE (expr);
23264 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
23265 return NULL_TREE;
23266 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
23267 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
23268 return convert (type, convert (float_type_node, expr));
23269 return NULL_TREE;
23272 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23273 This simply adds HFmode as a supported mode; even though we don't
23274 implement arithmetic on this type directly, it's supported by
23275 optabs conversions, much the way the double-word arithmetic is
23276 special-cased in the default hook. */
23278 static bool
23279 arm_scalar_mode_supported_p (machine_mode mode)
23281 if (mode == HFmode)
23282 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23283 else if (ALL_FIXED_POINT_MODE_P (mode))
23284 return true;
23285 else
23286 return default_scalar_mode_supported_p (mode);
23289 /* Emit code to reinterpret one Neon type as another, without altering bits. */
23290 void
23291 neon_reinterpret (rtx dest, rtx src)
23293 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
23296 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23297 not to early-clobber SRC registers in the process.
23299 We assume that the operands described by SRC and DEST represent a
23300 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23301 number of components into which the copy has been decomposed. */
23302 void
23303 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23305 unsigned int i;
23307 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23308 || REGNO (operands[0]) < REGNO (operands[1]))
23310 for (i = 0; i < count; i++)
23312 operands[2 * i] = dest[i];
23313 operands[2 * i + 1] = src[i];
23316 else
23318 for (i = 0; i < count; i++)
23320 operands[2 * i] = dest[count - i - 1];
23321 operands[2 * i + 1] = src[count - i - 1];
23326 /* Split operands into moves from op[1] + op[2] into op[0]. */
23328 void
23329 neon_split_vcombine (rtx operands[3])
23331 unsigned int dest = REGNO (operands[0]);
23332 unsigned int src1 = REGNO (operands[1]);
23333 unsigned int src2 = REGNO (operands[2]);
23334 machine_mode halfmode = GET_MODE (operands[1]);
23335 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23336 rtx destlo, desthi;
23338 if (src1 == dest && src2 == dest + halfregs)
23340 /* No-op move. Can't split to nothing; emit something. */
23341 emit_note (NOTE_INSN_DELETED);
23342 return;
23345 /* Preserve register attributes for variable tracking. */
23346 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23347 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23348 GET_MODE_SIZE (halfmode));
23350 /* Special case of reversed high/low parts. Use VSWP. */
23351 if (src2 == dest && src1 == dest + halfregs)
23353 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
23354 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
23355 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23356 return;
23359 if (!reg_overlap_mentioned_p (operands[2], destlo))
23361 /* Try to avoid unnecessary moves if part of the result
23362 is in the right place already. */
23363 if (src1 != dest)
23364 emit_move_insn (destlo, operands[1]);
23365 if (src2 != dest + halfregs)
23366 emit_move_insn (desthi, operands[2]);
23368 else
23370 if (src2 != dest + halfregs)
23371 emit_move_insn (desthi, operands[2]);
23372 if (src1 != dest)
23373 emit_move_insn (destlo, operands[1]);
23377 /* Return the number (counting from 0) of
23378 the least significant set bit in MASK. */
23380 inline static int
23381 number_of_first_bit_set (unsigned mask)
23383 return ctz_hwi (mask);
23386 /* Like emit_multi_reg_push, but allowing for a different set of
23387 registers to be described as saved. MASK is the set of registers
23388 to be saved; REAL_REGS is the set of registers to be described as
23389 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23391 static rtx_insn *
23392 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23394 unsigned long regno;
23395 rtx par[10], tmp, reg;
23396 rtx_insn *insn;
23397 int i, j;
23399 /* Build the parallel of the registers actually being stored. */
23400 for (i = 0; mask; ++i, mask &= mask - 1)
23402 regno = ctz_hwi (mask);
23403 reg = gen_rtx_REG (SImode, regno);
23405 if (i == 0)
23406 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23407 else
23408 tmp = gen_rtx_USE (VOIDmode, reg);
23410 par[i] = tmp;
23413 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23414 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23415 tmp = gen_frame_mem (BLKmode, tmp);
23416 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
23417 par[0] = tmp;
23419 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23420 insn = emit_insn (tmp);
23422 /* Always build the stack adjustment note for unwind info. */
23423 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23424 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
23425 par[0] = tmp;
23427 /* Build the parallel of the registers recorded as saved for unwind. */
23428 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23430 regno = ctz_hwi (real_regs);
23431 reg = gen_rtx_REG (SImode, regno);
23433 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23434 tmp = gen_frame_mem (SImode, tmp);
23435 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
23436 RTX_FRAME_RELATED_P (tmp) = 1;
23437 par[j + 1] = tmp;
23440 if (j == 0)
23441 tmp = par[0];
23442 else
23444 RTX_FRAME_RELATED_P (par[0]) = 1;
23445 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23448 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23450 return insn;
23453 /* Emit code to push or pop registers to or from the stack. F is the
23454 assembly file. MASK is the registers to pop. */
23455 static void
23456 thumb_pop (FILE *f, unsigned long mask)
23458 int regno;
23459 int lo_mask = mask & 0xFF;
23460 int pushed_words = 0;
23462 gcc_assert (mask);
23464 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23466 /* Special case. Do not generate a POP PC statement here, do it in
23467 thumb_exit() */
23468 thumb_exit (f, -1);
23469 return;
23472 fprintf (f, "\tpop\t{");
23474 /* Look at the low registers first. */
23475 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23477 if (lo_mask & 1)
23479 asm_fprintf (f, "%r", regno);
23481 if ((lo_mask & ~1) != 0)
23482 fprintf (f, ", ");
23484 pushed_words++;
23488 if (mask & (1 << PC_REGNUM))
23490 /* Catch popping the PC. */
23491 if (TARGET_INTERWORK || TARGET_BACKTRACE
23492 || crtl->calls_eh_return)
23494 /* The PC is never poped directly, instead
23495 it is popped into r3 and then BX is used. */
23496 fprintf (f, "}\n");
23498 thumb_exit (f, -1);
23500 return;
23502 else
23504 if (mask & 0xFF)
23505 fprintf (f, ", ");
23507 asm_fprintf (f, "%r", PC_REGNUM);
23511 fprintf (f, "}\n");
23514 /* Generate code to return from a thumb function.
23515 If 'reg_containing_return_addr' is -1, then the return address is
23516 actually on the stack, at the stack pointer. */
23517 static void
23518 thumb_exit (FILE *f, int reg_containing_return_addr)
23520 unsigned regs_available_for_popping;
23521 unsigned regs_to_pop;
23522 int pops_needed;
23523 unsigned available;
23524 unsigned required;
23525 machine_mode mode;
23526 int size;
23527 int restore_a4 = FALSE;
23529 /* Compute the registers we need to pop. */
23530 regs_to_pop = 0;
23531 pops_needed = 0;
23533 if (reg_containing_return_addr == -1)
23535 regs_to_pop |= 1 << LR_REGNUM;
23536 ++pops_needed;
23539 if (TARGET_BACKTRACE)
23541 /* Restore the (ARM) frame pointer and stack pointer. */
23542 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23543 pops_needed += 2;
23546 /* If there is nothing to pop then just emit the BX instruction and
23547 return. */
23548 if (pops_needed == 0)
23550 if (crtl->calls_eh_return)
23551 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23553 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23554 return;
23556 /* Otherwise if we are not supporting interworking and we have not created
23557 a backtrace structure and the function was not entered in ARM mode then
23558 just pop the return address straight into the PC. */
23559 else if (!TARGET_INTERWORK
23560 && !TARGET_BACKTRACE
23561 && !is_called_in_ARM_mode (current_function_decl)
23562 && !crtl->calls_eh_return)
23564 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23565 return;
23568 /* Find out how many of the (return) argument registers we can corrupt. */
23569 regs_available_for_popping = 0;
23571 /* If returning via __builtin_eh_return, the bottom three registers
23572 all contain information needed for the return. */
23573 if (crtl->calls_eh_return)
23574 size = 12;
23575 else
23577 /* If we can deduce the registers used from the function's
23578 return value. This is more reliable that examining
23579 df_regs_ever_live_p () because that will be set if the register is
23580 ever used in the function, not just if the register is used
23581 to hold a return value. */
23583 if (crtl->return_rtx != 0)
23584 mode = GET_MODE (crtl->return_rtx);
23585 else
23586 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23588 size = GET_MODE_SIZE (mode);
23590 if (size == 0)
23592 /* In a void function we can use any argument register.
23593 In a function that returns a structure on the stack
23594 we can use the second and third argument registers. */
23595 if (mode == VOIDmode)
23596 regs_available_for_popping =
23597 (1 << ARG_REGISTER (1))
23598 | (1 << ARG_REGISTER (2))
23599 | (1 << ARG_REGISTER (3));
23600 else
23601 regs_available_for_popping =
23602 (1 << ARG_REGISTER (2))
23603 | (1 << ARG_REGISTER (3));
23605 else if (size <= 4)
23606 regs_available_for_popping =
23607 (1 << ARG_REGISTER (2))
23608 | (1 << ARG_REGISTER (3));
23609 else if (size <= 8)
23610 regs_available_for_popping =
23611 (1 << ARG_REGISTER (3));
23614 /* Match registers to be popped with registers into which we pop them. */
23615 for (available = regs_available_for_popping,
23616 required = regs_to_pop;
23617 required != 0 && available != 0;
23618 available &= ~(available & - available),
23619 required &= ~(required & - required))
23620 -- pops_needed;
23622 /* If we have any popping registers left over, remove them. */
23623 if (available > 0)
23624 regs_available_for_popping &= ~available;
23626 /* Otherwise if we need another popping register we can use
23627 the fourth argument register. */
23628 else if (pops_needed)
23630 /* If we have not found any free argument registers and
23631 reg a4 contains the return address, we must move it. */
23632 if (regs_available_for_popping == 0
23633 && reg_containing_return_addr == LAST_ARG_REGNUM)
23635 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23636 reg_containing_return_addr = LR_REGNUM;
23638 else if (size > 12)
23640 /* Register a4 is being used to hold part of the return value,
23641 but we have dire need of a free, low register. */
23642 restore_a4 = TRUE;
23644 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23647 if (reg_containing_return_addr != LAST_ARG_REGNUM)
23649 /* The fourth argument register is available. */
23650 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23652 --pops_needed;
23656 /* Pop as many registers as we can. */
23657 thumb_pop (f, regs_available_for_popping);
23659 /* Process the registers we popped. */
23660 if (reg_containing_return_addr == -1)
23662 /* The return address was popped into the lowest numbered register. */
23663 regs_to_pop &= ~(1 << LR_REGNUM);
23665 reg_containing_return_addr =
23666 number_of_first_bit_set (regs_available_for_popping);
23668 /* Remove this register for the mask of available registers, so that
23669 the return address will not be corrupted by further pops. */
23670 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
23673 /* If we popped other registers then handle them here. */
23674 if (regs_available_for_popping)
23676 int frame_pointer;
23678 /* Work out which register currently contains the frame pointer. */
23679 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
23681 /* Move it into the correct place. */
23682 asm_fprintf (f, "\tmov\t%r, %r\n",
23683 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
23685 /* (Temporarily) remove it from the mask of popped registers. */
23686 regs_available_for_popping &= ~(1 << frame_pointer);
23687 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
23689 if (regs_available_for_popping)
23691 int stack_pointer;
23693 /* We popped the stack pointer as well,
23694 find the register that contains it. */
23695 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
23697 /* Move it into the stack register. */
23698 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
23700 /* At this point we have popped all necessary registers, so
23701 do not worry about restoring regs_available_for_popping
23702 to its correct value:
23704 assert (pops_needed == 0)
23705 assert (regs_available_for_popping == (1 << frame_pointer))
23706 assert (regs_to_pop == (1 << STACK_POINTER)) */
23708 else
23710 /* Since we have just move the popped value into the frame
23711 pointer, the popping register is available for reuse, and
23712 we know that we still have the stack pointer left to pop. */
23713 regs_available_for_popping |= (1 << frame_pointer);
23717 /* If we still have registers left on the stack, but we no longer have
23718 any registers into which we can pop them, then we must move the return
23719 address into the link register and make available the register that
23720 contained it. */
23721 if (regs_available_for_popping == 0 && pops_needed > 0)
23723 regs_available_for_popping |= 1 << reg_containing_return_addr;
23725 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
23726 reg_containing_return_addr);
23728 reg_containing_return_addr = LR_REGNUM;
23731 /* If we have registers left on the stack then pop some more.
23732 We know that at most we will want to pop FP and SP. */
23733 if (pops_needed > 0)
23735 int popped_into;
23736 int move_to;
23738 thumb_pop (f, regs_available_for_popping);
23740 /* We have popped either FP or SP.
23741 Move whichever one it is into the correct register. */
23742 popped_into = number_of_first_bit_set (regs_available_for_popping);
23743 move_to = number_of_first_bit_set (regs_to_pop);
23745 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
23747 regs_to_pop &= ~(1 << move_to);
23749 --pops_needed;
23752 /* If we still have not popped everything then we must have only
23753 had one register available to us and we are now popping the SP. */
23754 if (pops_needed > 0)
23756 int popped_into;
23758 thumb_pop (f, regs_available_for_popping);
23760 popped_into = number_of_first_bit_set (regs_available_for_popping);
23762 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
23764 assert (regs_to_pop == (1 << STACK_POINTER))
23765 assert (pops_needed == 1)
23769 /* If necessary restore the a4 register. */
23770 if (restore_a4)
23772 if (reg_containing_return_addr != LR_REGNUM)
23774 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23775 reg_containing_return_addr = LR_REGNUM;
23778 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
23781 if (crtl->calls_eh_return)
23782 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23784 /* Return to caller. */
23785 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23788 /* Scan INSN just before assembler is output for it.
23789 For Thumb-1, we track the status of the condition codes; this
23790 information is used in the cbranchsi4_insn pattern. */
23791 void
23792 thumb1_final_prescan_insn (rtx_insn *insn)
23794 if (flag_print_asm_name)
23795 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
23796 INSN_ADDRESSES (INSN_UID (insn)));
23797 /* Don't overwrite the previous setter when we get to a cbranch. */
23798 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
23800 enum attr_conds conds;
23802 if (cfun->machine->thumb1_cc_insn)
23804 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
23805 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
23806 CC_STATUS_INIT;
23808 conds = get_attr_conds (insn);
23809 if (conds == CONDS_SET)
23811 rtx set = single_set (insn);
23812 cfun->machine->thumb1_cc_insn = insn;
23813 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
23814 cfun->machine->thumb1_cc_op1 = const0_rtx;
23815 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
23816 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
23818 rtx src1 = XEXP (SET_SRC (set), 1);
23819 if (src1 == const0_rtx)
23820 cfun->machine->thumb1_cc_mode = CCmode;
23822 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
23824 /* Record the src register operand instead of dest because
23825 cprop_hardreg pass propagates src. */
23826 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
23829 else if (conds != CONDS_NOCOND)
23830 cfun->machine->thumb1_cc_insn = NULL_RTX;
23833 /* Check if unexpected far jump is used. */
23834 if (cfun->machine->lr_save_eliminated
23835 && get_attr_far_jump (insn) == FAR_JUMP_YES)
23836 internal_error("Unexpected thumb1 far jump");
23840 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
23842 unsigned HOST_WIDE_INT mask = 0xff;
23843 int i;
23845 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
23846 if (val == 0) /* XXX */
23847 return 0;
23849 for (i = 0; i < 25; i++)
23850 if ((val & (mask << i)) == val)
23851 return 1;
23853 return 0;
23856 /* Returns nonzero if the current function contains,
23857 or might contain a far jump. */
23858 static int
23859 thumb_far_jump_used_p (void)
23861 rtx_insn *insn;
23862 bool far_jump = false;
23863 unsigned int func_size = 0;
23865 /* This test is only important for leaf functions. */
23866 /* assert (!leaf_function_p ()); */
23868 /* If we have already decided that far jumps may be used,
23869 do not bother checking again, and always return true even if
23870 it turns out that they are not being used. Once we have made
23871 the decision that far jumps are present (and that hence the link
23872 register will be pushed onto the stack) we cannot go back on it. */
23873 if (cfun->machine->far_jump_used)
23874 return 1;
23876 /* If this function is not being called from the prologue/epilogue
23877 generation code then it must be being called from the
23878 INITIAL_ELIMINATION_OFFSET macro. */
23879 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
23881 /* In this case we know that we are being asked about the elimination
23882 of the arg pointer register. If that register is not being used,
23883 then there are no arguments on the stack, and we do not have to
23884 worry that a far jump might force the prologue to push the link
23885 register, changing the stack offsets. In this case we can just
23886 return false, since the presence of far jumps in the function will
23887 not affect stack offsets.
23889 If the arg pointer is live (or if it was live, but has now been
23890 eliminated and so set to dead) then we do have to test to see if
23891 the function might contain a far jump. This test can lead to some
23892 false negatives, since before reload is completed, then length of
23893 branch instructions is not known, so gcc defaults to returning their
23894 longest length, which in turn sets the far jump attribute to true.
23896 A false negative will not result in bad code being generated, but it
23897 will result in a needless push and pop of the link register. We
23898 hope that this does not occur too often.
23900 If we need doubleword stack alignment this could affect the other
23901 elimination offsets so we can't risk getting it wrong. */
23902 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
23903 cfun->machine->arg_pointer_live = 1;
23904 else if (!cfun->machine->arg_pointer_live)
23905 return 0;
23908 /* We should not change far_jump_used during or after reload, as there is
23909 no chance to change stack frame layout. */
23910 if (reload_in_progress || reload_completed)
23911 return 0;
23913 /* Check to see if the function contains a branch
23914 insn with the far jump attribute set. */
23915 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23917 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
23919 far_jump = true;
23921 func_size += get_attr_length (insn);
23924 /* Attribute far_jump will always be true for thumb1 before
23925 shorten_branch pass. So checking far_jump attribute before
23926 shorten_branch isn't much useful.
23928 Following heuristic tries to estimate more accurately if a far jump
23929 may finally be used. The heuristic is very conservative as there is
23930 no chance to roll-back the decision of not to use far jump.
23932 Thumb1 long branch offset is -2048 to 2046. The worst case is each
23933 2-byte insn is associated with a 4 byte constant pool. Using
23934 function size 2048/3 as the threshold is conservative enough. */
23935 if (far_jump)
23937 if ((func_size * 3) >= 2048)
23939 /* Record the fact that we have decided that
23940 the function does use far jumps. */
23941 cfun->machine->far_jump_used = 1;
23942 return 1;
23946 return 0;
23949 /* Return nonzero if FUNC must be entered in ARM mode. */
23951 is_called_in_ARM_mode (tree func)
23953 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
23955 /* Ignore the problem about functions whose address is taken. */
23956 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
23957 return TRUE;
23959 #ifdef ARM_PE
23960 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
23961 #else
23962 return FALSE;
23963 #endif
23966 /* Given the stack offsets and register mask in OFFSETS, decide how
23967 many additional registers to push instead of subtracting a constant
23968 from SP. For epilogues the principle is the same except we use pop.
23969 FOR_PROLOGUE indicates which we're generating. */
23970 static int
23971 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
23973 HOST_WIDE_INT amount;
23974 unsigned long live_regs_mask = offsets->saved_regs_mask;
23975 /* Extract a mask of the ones we can give to the Thumb's push/pop
23976 instruction. */
23977 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
23978 /* Then count how many other high registers will need to be pushed. */
23979 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23980 int n_free, reg_base, size;
23982 if (!for_prologue && frame_pointer_needed)
23983 amount = offsets->locals_base - offsets->saved_regs;
23984 else
23985 amount = offsets->outgoing_args - offsets->saved_regs;
23987 /* If the stack frame size is 512 exactly, we can save one load
23988 instruction, which should make this a win even when optimizing
23989 for speed. */
23990 if (!optimize_size && amount != 512)
23991 return 0;
23993 /* Can't do this if there are high registers to push. */
23994 if (high_regs_pushed != 0)
23995 return 0;
23997 /* Shouldn't do it in the prologue if no registers would normally
23998 be pushed at all. In the epilogue, also allow it if we'll have
23999 a pop insn for the PC. */
24000 if (l_mask == 0
24001 && (for_prologue
24002 || TARGET_BACKTRACE
24003 || (live_regs_mask & 1 << LR_REGNUM) == 0
24004 || TARGET_INTERWORK
24005 || crtl->args.pretend_args_size != 0))
24006 return 0;
24008 /* Don't do this if thumb_expand_prologue wants to emit instructions
24009 between the push and the stack frame allocation. */
24010 if (for_prologue
24011 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24012 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24013 return 0;
24015 reg_base = 0;
24016 n_free = 0;
24017 if (!for_prologue)
24019 size = arm_size_return_regs ();
24020 reg_base = ARM_NUM_INTS (size);
24021 live_regs_mask >>= reg_base;
24024 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24025 && (for_prologue || call_used_regs[reg_base + n_free]))
24027 live_regs_mask >>= 1;
24028 n_free++;
24031 if (n_free == 0)
24032 return 0;
24033 gcc_assert (amount / 4 * 4 == amount);
24035 if (amount >= 512 && (amount - n_free * 4) < 512)
24036 return (amount - 508) / 4;
24037 if (amount <= n_free * 4)
24038 return amount / 4;
24039 return 0;
24042 /* The bits which aren't usefully expanded as rtl. */
24043 const char *
24044 thumb1_unexpanded_epilogue (void)
24046 arm_stack_offsets *offsets;
24047 int regno;
24048 unsigned long live_regs_mask = 0;
24049 int high_regs_pushed = 0;
24050 int extra_pop;
24051 int had_to_push_lr;
24052 int size;
24054 if (cfun->machine->return_used_this_function != 0)
24055 return "";
24057 if (IS_NAKED (arm_current_func_type ()))
24058 return "";
24060 offsets = arm_get_frame_offsets ();
24061 live_regs_mask = offsets->saved_regs_mask;
24062 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24064 /* If we can deduce the registers used from the function's return value.
24065 This is more reliable that examining df_regs_ever_live_p () because that
24066 will be set if the register is ever used in the function, not just if
24067 the register is used to hold a return value. */
24068 size = arm_size_return_regs ();
24070 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24071 if (extra_pop > 0)
24073 unsigned long extra_mask = (1 << extra_pop) - 1;
24074 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24077 /* The prolog may have pushed some high registers to use as
24078 work registers. e.g. the testsuite file:
24079 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24080 compiles to produce:
24081 push {r4, r5, r6, r7, lr}
24082 mov r7, r9
24083 mov r6, r8
24084 push {r6, r7}
24085 as part of the prolog. We have to undo that pushing here. */
24087 if (high_regs_pushed)
24089 unsigned long mask = live_regs_mask & 0xff;
24090 int next_hi_reg;
24092 /* The available low registers depend on the size of the value we are
24093 returning. */
24094 if (size <= 12)
24095 mask |= 1 << 3;
24096 if (size <= 8)
24097 mask |= 1 << 2;
24099 if (mask == 0)
24100 /* Oh dear! We have no low registers into which we can pop
24101 high registers! */
24102 internal_error
24103 ("no low registers available for popping high registers");
24105 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24106 if (live_regs_mask & (1 << next_hi_reg))
24107 break;
24109 while (high_regs_pushed)
24111 /* Find lo register(s) into which the high register(s) can
24112 be popped. */
24113 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24115 if (mask & (1 << regno))
24116 high_regs_pushed--;
24117 if (high_regs_pushed == 0)
24118 break;
24121 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24123 /* Pop the values into the low register(s). */
24124 thumb_pop (asm_out_file, mask);
24126 /* Move the value(s) into the high registers. */
24127 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24129 if (mask & (1 << regno))
24131 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24132 regno);
24134 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24135 if (live_regs_mask & (1 << next_hi_reg))
24136 break;
24140 live_regs_mask &= ~0x0f00;
24143 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24144 live_regs_mask &= 0xff;
24146 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24148 /* Pop the return address into the PC. */
24149 if (had_to_push_lr)
24150 live_regs_mask |= 1 << PC_REGNUM;
24152 /* Either no argument registers were pushed or a backtrace
24153 structure was created which includes an adjusted stack
24154 pointer, so just pop everything. */
24155 if (live_regs_mask)
24156 thumb_pop (asm_out_file, live_regs_mask);
24158 /* We have either just popped the return address into the
24159 PC or it is was kept in LR for the entire function.
24160 Note that thumb_pop has already called thumb_exit if the
24161 PC was in the list. */
24162 if (!had_to_push_lr)
24163 thumb_exit (asm_out_file, LR_REGNUM);
24165 else
24167 /* Pop everything but the return address. */
24168 if (live_regs_mask)
24169 thumb_pop (asm_out_file, live_regs_mask);
24171 if (had_to_push_lr)
24173 if (size > 12)
24175 /* We have no free low regs, so save one. */
24176 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24177 LAST_ARG_REGNUM);
24180 /* Get the return address into a temporary register. */
24181 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24183 if (size > 12)
24185 /* Move the return address to lr. */
24186 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24187 LAST_ARG_REGNUM);
24188 /* Restore the low register. */
24189 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24190 IP_REGNUM);
24191 regno = LR_REGNUM;
24193 else
24194 regno = LAST_ARG_REGNUM;
24196 else
24197 regno = LR_REGNUM;
24199 /* Remove the argument registers that were pushed onto the stack. */
24200 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24201 SP_REGNUM, SP_REGNUM,
24202 crtl->args.pretend_args_size);
24204 thumb_exit (asm_out_file, regno);
24207 return "";
24210 /* Functions to save and restore machine-specific function data. */
24211 static struct machine_function *
24212 arm_init_machine_status (void)
24214 struct machine_function *machine;
24215 machine = ggc_cleared_alloc<machine_function> ();
24217 #if ARM_FT_UNKNOWN != 0
24218 machine->func_type = ARM_FT_UNKNOWN;
24219 #endif
24220 return machine;
24223 /* Return an RTX indicating where the return address to the
24224 calling function can be found. */
24226 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24228 if (count != 0)
24229 return NULL_RTX;
24231 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24234 /* Do anything needed before RTL is emitted for each function. */
24235 void
24236 arm_init_expanders (void)
24238 /* Arrange to initialize and mark the machine per-function status. */
24239 init_machine_status = arm_init_machine_status;
24241 /* This is to stop the combine pass optimizing away the alignment
24242 adjustment of va_arg. */
24243 /* ??? It is claimed that this should not be necessary. */
24244 if (cfun)
24245 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24249 /* Like arm_compute_initial_elimination offset. Simpler because there
24250 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24251 to point at the base of the local variables after static stack
24252 space for a function has been allocated. */
24254 HOST_WIDE_INT
24255 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24257 arm_stack_offsets *offsets;
24259 offsets = arm_get_frame_offsets ();
24261 switch (from)
24263 case ARG_POINTER_REGNUM:
24264 switch (to)
24266 case STACK_POINTER_REGNUM:
24267 return offsets->outgoing_args - offsets->saved_args;
24269 case FRAME_POINTER_REGNUM:
24270 return offsets->soft_frame - offsets->saved_args;
24272 case ARM_HARD_FRAME_POINTER_REGNUM:
24273 return offsets->saved_regs - offsets->saved_args;
24275 case THUMB_HARD_FRAME_POINTER_REGNUM:
24276 return offsets->locals_base - offsets->saved_args;
24278 default:
24279 gcc_unreachable ();
24281 break;
24283 case FRAME_POINTER_REGNUM:
24284 switch (to)
24286 case STACK_POINTER_REGNUM:
24287 return offsets->outgoing_args - offsets->soft_frame;
24289 case ARM_HARD_FRAME_POINTER_REGNUM:
24290 return offsets->saved_regs - offsets->soft_frame;
24292 case THUMB_HARD_FRAME_POINTER_REGNUM:
24293 return offsets->locals_base - offsets->soft_frame;
24295 default:
24296 gcc_unreachable ();
24298 break;
24300 default:
24301 gcc_unreachable ();
24305 /* Generate the function's prologue. */
24307 void
24308 thumb1_expand_prologue (void)
24310 rtx_insn *insn;
24312 HOST_WIDE_INT amount;
24313 arm_stack_offsets *offsets;
24314 unsigned long func_type;
24315 int regno;
24316 unsigned long live_regs_mask;
24317 unsigned long l_mask;
24318 unsigned high_regs_pushed = 0;
24320 func_type = arm_current_func_type ();
24322 /* Naked functions don't have prologues. */
24323 if (IS_NAKED (func_type))
24324 return;
24326 if (IS_INTERRUPT (func_type))
24328 error ("interrupt Service Routines cannot be coded in Thumb mode");
24329 return;
24332 if (is_called_in_ARM_mode (current_function_decl))
24333 emit_insn (gen_prologue_thumb1_interwork ());
24335 offsets = arm_get_frame_offsets ();
24336 live_regs_mask = offsets->saved_regs_mask;
24338 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24339 l_mask = live_regs_mask & 0x40ff;
24340 /* Then count how many other high registers will need to be pushed. */
24341 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24343 if (crtl->args.pretend_args_size)
24345 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24347 if (cfun->machine->uses_anonymous_args)
24349 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24350 unsigned long mask;
24352 mask = 1ul << (LAST_ARG_REGNUM + 1);
24353 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24355 insn = thumb1_emit_multi_reg_push (mask, 0);
24357 else
24359 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24360 stack_pointer_rtx, x));
24362 RTX_FRAME_RELATED_P (insn) = 1;
24365 if (TARGET_BACKTRACE)
24367 HOST_WIDE_INT offset = 0;
24368 unsigned work_register;
24369 rtx work_reg, x, arm_hfp_rtx;
24371 /* We have been asked to create a stack backtrace structure.
24372 The code looks like this:
24374 0 .align 2
24375 0 func:
24376 0 sub SP, #16 Reserve space for 4 registers.
24377 2 push {R7} Push low registers.
24378 4 add R7, SP, #20 Get the stack pointer before the push.
24379 6 str R7, [SP, #8] Store the stack pointer
24380 (before reserving the space).
24381 8 mov R7, PC Get hold of the start of this code + 12.
24382 10 str R7, [SP, #16] Store it.
24383 12 mov R7, FP Get hold of the current frame pointer.
24384 14 str R7, [SP, #4] Store it.
24385 16 mov R7, LR Get hold of the current return address.
24386 18 str R7, [SP, #12] Store it.
24387 20 add R7, SP, #16 Point at the start of the
24388 backtrace structure.
24389 22 mov FP, R7 Put this value into the frame pointer. */
24391 work_register = thumb_find_work_register (live_regs_mask);
24392 work_reg = gen_rtx_REG (SImode, work_register);
24393 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24395 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24396 stack_pointer_rtx, GEN_INT (-16)));
24397 RTX_FRAME_RELATED_P (insn) = 1;
24399 if (l_mask)
24401 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24402 RTX_FRAME_RELATED_P (insn) = 1;
24404 offset = bit_count (l_mask) * UNITS_PER_WORD;
24407 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24408 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24410 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24411 x = gen_frame_mem (SImode, x);
24412 emit_move_insn (x, work_reg);
24414 /* Make sure that the instruction fetching the PC is in the right place
24415 to calculate "start of backtrace creation code + 12". */
24416 /* ??? The stores using the common WORK_REG ought to be enough to
24417 prevent the scheduler from doing anything weird. Failing that
24418 we could always move all of the following into an UNSPEC_VOLATILE. */
24419 if (l_mask)
24421 x = gen_rtx_REG (SImode, PC_REGNUM);
24422 emit_move_insn (work_reg, x);
24424 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24425 x = gen_frame_mem (SImode, x);
24426 emit_move_insn (x, work_reg);
24428 emit_move_insn (work_reg, arm_hfp_rtx);
24430 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24431 x = gen_frame_mem (SImode, x);
24432 emit_move_insn (x, work_reg);
24434 else
24436 emit_move_insn (work_reg, arm_hfp_rtx);
24438 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24439 x = gen_frame_mem (SImode, x);
24440 emit_move_insn (x, work_reg);
24442 x = gen_rtx_REG (SImode, PC_REGNUM);
24443 emit_move_insn (work_reg, x);
24445 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24446 x = gen_frame_mem (SImode, x);
24447 emit_move_insn (x, work_reg);
24450 x = gen_rtx_REG (SImode, LR_REGNUM);
24451 emit_move_insn (work_reg, x);
24453 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24454 x = gen_frame_mem (SImode, x);
24455 emit_move_insn (x, work_reg);
24457 x = GEN_INT (offset + 12);
24458 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24460 emit_move_insn (arm_hfp_rtx, work_reg);
24462 /* Optimization: If we are not pushing any low registers but we are going
24463 to push some high registers then delay our first push. This will just
24464 be a push of LR and we can combine it with the push of the first high
24465 register. */
24466 else if ((l_mask & 0xff) != 0
24467 || (high_regs_pushed == 0 && l_mask))
24469 unsigned long mask = l_mask;
24470 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24471 insn = thumb1_emit_multi_reg_push (mask, mask);
24472 RTX_FRAME_RELATED_P (insn) = 1;
24475 if (high_regs_pushed)
24477 unsigned pushable_regs;
24478 unsigned next_hi_reg;
24479 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24480 : crtl->args.info.nregs;
24481 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24483 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24484 if (live_regs_mask & (1 << next_hi_reg))
24485 break;
24487 /* Here we need to mask out registers used for passing arguments
24488 even if they can be pushed. This is to avoid using them to stash the high
24489 registers. Such kind of stash may clobber the use of arguments. */
24490 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
24492 if (pushable_regs == 0)
24493 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24495 while (high_regs_pushed > 0)
24497 unsigned long real_regs_mask = 0;
24499 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
24501 if (pushable_regs & (1 << regno))
24503 emit_move_insn (gen_rtx_REG (SImode, regno),
24504 gen_rtx_REG (SImode, next_hi_reg));
24506 high_regs_pushed --;
24507 real_regs_mask |= (1 << next_hi_reg);
24509 if (high_regs_pushed)
24511 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24512 next_hi_reg --)
24513 if (live_regs_mask & (1 << next_hi_reg))
24514 break;
24516 else
24518 pushable_regs &= ~((1 << regno) - 1);
24519 break;
24524 /* If we had to find a work register and we have not yet
24525 saved the LR then add it to the list of regs to push. */
24526 if (l_mask == (1 << LR_REGNUM))
24528 pushable_regs |= l_mask;
24529 real_regs_mask |= l_mask;
24530 l_mask = 0;
24533 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
24534 RTX_FRAME_RELATED_P (insn) = 1;
24538 /* Load the pic register before setting the frame pointer,
24539 so we can use r7 as a temporary work register. */
24540 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24541 arm_load_pic_register (live_regs_mask);
24543 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24544 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24545 stack_pointer_rtx);
24547 if (flag_stack_usage_info)
24548 current_function_static_stack_size
24549 = offsets->outgoing_args - offsets->saved_args;
24551 amount = offsets->outgoing_args - offsets->saved_regs;
24552 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24553 if (amount)
24555 if (amount < 512)
24557 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24558 GEN_INT (- amount)));
24559 RTX_FRAME_RELATED_P (insn) = 1;
24561 else
24563 rtx reg, dwarf;
24565 /* The stack decrement is too big for an immediate value in a single
24566 insn. In theory we could issue multiple subtracts, but after
24567 three of them it becomes more space efficient to place the full
24568 value in the constant pool and load into a register. (Also the
24569 ARM debugger really likes to see only one stack decrement per
24570 function). So instead we look for a scratch register into which
24571 we can load the decrement, and then we subtract this from the
24572 stack pointer. Unfortunately on the thumb the only available
24573 scratch registers are the argument registers, and we cannot use
24574 these as they may hold arguments to the function. Instead we
24575 attempt to locate a call preserved register which is used by this
24576 function. If we can find one, then we know that it will have
24577 been pushed at the start of the prologue and so we can corrupt
24578 it now. */
24579 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24580 if (live_regs_mask & (1 << regno))
24581 break;
24583 gcc_assert(regno <= LAST_LO_REGNUM);
24585 reg = gen_rtx_REG (SImode, regno);
24587 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24589 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24590 stack_pointer_rtx, reg));
24592 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
24593 plus_constant (Pmode, stack_pointer_rtx,
24594 -amount));
24595 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24596 RTX_FRAME_RELATED_P (insn) = 1;
24600 if (frame_pointer_needed)
24601 thumb_set_frame_pointer (offsets);
24603 /* If we are profiling, make sure no instructions are scheduled before
24604 the call to mcount. Similarly if the user has requested no
24605 scheduling in the prolog. Similarly if we want non-call exceptions
24606 using the EABI unwinder, to prevent faulting instructions from being
24607 swapped with a stack adjustment. */
24608 if (crtl->profile || !TARGET_SCHED_PROLOG
24609 || (arm_except_unwind_info (&global_options) == UI_TARGET
24610 && cfun->can_throw_non_call_exceptions))
24611 emit_insn (gen_blockage ());
24613 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24614 if (live_regs_mask & 0xff)
24615 cfun->machine->lr_save_eliminated = 0;
24618 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
24619 POP instruction can be generated. LR should be replaced by PC. All
24620 the checks required are already done by USE_RETURN_INSN (). Hence,
24621 all we really need to check here is if single register is to be
24622 returned, or multiple register return. */
24623 void
24624 thumb2_expand_return (bool simple_return)
24626 int i, num_regs;
24627 unsigned long saved_regs_mask;
24628 arm_stack_offsets *offsets;
24630 offsets = arm_get_frame_offsets ();
24631 saved_regs_mask = offsets->saved_regs_mask;
24633 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
24634 if (saved_regs_mask & (1 << i))
24635 num_regs++;
24637 if (!simple_return && saved_regs_mask)
24639 if (num_regs == 1)
24641 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
24642 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
24643 rtx addr = gen_rtx_MEM (SImode,
24644 gen_rtx_POST_INC (SImode,
24645 stack_pointer_rtx));
24646 set_mem_alias_set (addr, get_frame_alias_set ());
24647 XVECEXP (par, 0, 0) = ret_rtx;
24648 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
24649 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
24650 emit_jump_insn (par);
24652 else
24654 saved_regs_mask &= ~ (1 << LR_REGNUM);
24655 saved_regs_mask |= (1 << PC_REGNUM);
24656 arm_emit_multi_reg_pop (saved_regs_mask);
24659 else
24661 emit_jump_insn (simple_return_rtx);
24665 void
24666 thumb1_expand_epilogue (void)
24668 HOST_WIDE_INT amount;
24669 arm_stack_offsets *offsets;
24670 int regno;
24672 /* Naked functions don't have prologues. */
24673 if (IS_NAKED (arm_current_func_type ()))
24674 return;
24676 offsets = arm_get_frame_offsets ();
24677 amount = offsets->outgoing_args - offsets->saved_regs;
24679 if (frame_pointer_needed)
24681 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
24682 amount = offsets->locals_base - offsets->saved_regs;
24684 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
24686 gcc_assert (amount >= 0);
24687 if (amount)
24689 emit_insn (gen_blockage ());
24691 if (amount < 512)
24692 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24693 GEN_INT (amount)));
24694 else
24696 /* r3 is always free in the epilogue. */
24697 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
24699 emit_insn (gen_movsi (reg, GEN_INT (amount)));
24700 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
24704 /* Emit a USE (stack_pointer_rtx), so that
24705 the stack adjustment will not be deleted. */
24706 emit_insn (gen_force_register_use (stack_pointer_rtx));
24708 if (crtl->profile || !TARGET_SCHED_PROLOG)
24709 emit_insn (gen_blockage ());
24711 /* Emit a clobber for each insn that will be restored in the epilogue,
24712 so that flow2 will get register lifetimes correct. */
24713 for (regno = 0; regno < 13; regno++)
24714 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
24715 emit_clobber (gen_rtx_REG (SImode, regno));
24717 if (! df_regs_ever_live_p (LR_REGNUM))
24718 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
24721 /* Epilogue code for APCS frame. */
24722 static void
24723 arm_expand_epilogue_apcs_frame (bool really_return)
24725 unsigned long func_type;
24726 unsigned long saved_regs_mask;
24727 int num_regs = 0;
24728 int i;
24729 int floats_from_frame = 0;
24730 arm_stack_offsets *offsets;
24732 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
24733 func_type = arm_current_func_type ();
24735 /* Get frame offsets for ARM. */
24736 offsets = arm_get_frame_offsets ();
24737 saved_regs_mask = offsets->saved_regs_mask;
24739 /* Find the offset of the floating-point save area in the frame. */
24740 floats_from_frame
24741 = (offsets->saved_args
24742 + arm_compute_static_chain_stack_bytes ()
24743 - offsets->frame);
24745 /* Compute how many core registers saved and how far away the floats are. */
24746 for (i = 0; i <= LAST_ARM_REGNUM; i++)
24747 if (saved_regs_mask & (1 << i))
24749 num_regs++;
24750 floats_from_frame += 4;
24753 if (TARGET_HARD_FLOAT && TARGET_VFP)
24755 int start_reg;
24756 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
24758 /* The offset is from IP_REGNUM. */
24759 int saved_size = arm_get_vfp_saved_size ();
24760 if (saved_size > 0)
24762 rtx_insn *insn;
24763 floats_from_frame += saved_size;
24764 insn = emit_insn (gen_addsi3 (ip_rtx,
24765 hard_frame_pointer_rtx,
24766 GEN_INT (-floats_from_frame)));
24767 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
24768 ip_rtx, hard_frame_pointer_rtx);
24771 /* Generate VFP register multi-pop. */
24772 start_reg = FIRST_VFP_REGNUM;
24774 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
24775 /* Look for a case where a reg does not need restoring. */
24776 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24777 && (!df_regs_ever_live_p (i + 1)
24778 || call_used_regs[i + 1]))
24780 if (start_reg != i)
24781 arm_emit_vfp_multi_reg_pop (start_reg,
24782 (i - start_reg) / 2,
24783 gen_rtx_REG (SImode,
24784 IP_REGNUM));
24785 start_reg = i + 2;
24788 /* Restore the remaining regs that we have discovered (or possibly
24789 even all of them, if the conditional in the for loop never
24790 fired). */
24791 if (start_reg != i)
24792 arm_emit_vfp_multi_reg_pop (start_reg,
24793 (i - start_reg) / 2,
24794 gen_rtx_REG (SImode, IP_REGNUM));
24797 if (TARGET_IWMMXT)
24799 /* The frame pointer is guaranteed to be non-double-word aligned, as
24800 it is set to double-word-aligned old_stack_pointer - 4. */
24801 rtx_insn *insn;
24802 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
24804 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
24805 if (df_regs_ever_live_p (i) && !call_used_regs[i])
24807 rtx addr = gen_frame_mem (V2SImode,
24808 plus_constant (Pmode, hard_frame_pointer_rtx,
24809 - lrm_count * 4));
24810 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24811 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24812 gen_rtx_REG (V2SImode, i),
24813 NULL_RTX);
24814 lrm_count += 2;
24818 /* saved_regs_mask should contain IP which contains old stack pointer
24819 at the time of activation creation. Since SP and IP are adjacent registers,
24820 we can restore the value directly into SP. */
24821 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
24822 saved_regs_mask &= ~(1 << IP_REGNUM);
24823 saved_regs_mask |= (1 << SP_REGNUM);
24825 /* There are two registers left in saved_regs_mask - LR and PC. We
24826 only need to restore LR (the return address), but to
24827 save time we can load it directly into PC, unless we need a
24828 special function exit sequence, or we are not really returning. */
24829 if (really_return
24830 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
24831 && !crtl->calls_eh_return)
24832 /* Delete LR from the register mask, so that LR on
24833 the stack is loaded into the PC in the register mask. */
24834 saved_regs_mask &= ~(1 << LR_REGNUM);
24835 else
24836 saved_regs_mask &= ~(1 << PC_REGNUM);
24838 num_regs = bit_count (saved_regs_mask);
24839 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
24841 rtx_insn *insn;
24842 emit_insn (gen_blockage ());
24843 /* Unwind the stack to just below the saved registers. */
24844 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24845 hard_frame_pointer_rtx,
24846 GEN_INT (- 4 * num_regs)));
24848 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
24849 stack_pointer_rtx, hard_frame_pointer_rtx);
24852 arm_emit_multi_reg_pop (saved_regs_mask);
24854 if (IS_INTERRUPT (func_type))
24856 /* Interrupt handlers will have pushed the
24857 IP onto the stack, so restore it now. */
24858 rtx_insn *insn;
24859 rtx addr = gen_rtx_MEM (SImode,
24860 gen_rtx_POST_INC (SImode,
24861 stack_pointer_rtx));
24862 set_mem_alias_set (addr, get_frame_alias_set ());
24863 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
24864 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24865 gen_rtx_REG (SImode, IP_REGNUM),
24866 NULL_RTX);
24869 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
24870 return;
24872 if (crtl->calls_eh_return)
24873 emit_insn (gen_addsi3 (stack_pointer_rtx,
24874 stack_pointer_rtx,
24875 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
24877 if (IS_STACKALIGN (func_type))
24878 /* Restore the original stack pointer. Before prologue, the stack was
24879 realigned and the original stack pointer saved in r0. For details,
24880 see comment in arm_expand_prologue. */
24881 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
24883 emit_jump_insn (simple_return_rtx);
24886 /* Generate RTL to represent ARM epilogue. Really_return is true if the
24887 function is not a sibcall. */
24888 void
24889 arm_expand_epilogue (bool really_return)
24891 unsigned long func_type;
24892 unsigned long saved_regs_mask;
24893 int num_regs = 0;
24894 int i;
24895 int amount;
24896 arm_stack_offsets *offsets;
24898 func_type = arm_current_func_type ();
24900 /* Naked functions don't have epilogue. Hence, generate return pattern, and
24901 let output_return_instruction take care of instruction emission if any. */
24902 if (IS_NAKED (func_type)
24903 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
24905 if (really_return)
24906 emit_jump_insn (simple_return_rtx);
24907 return;
24910 /* If we are throwing an exception, then we really must be doing a
24911 return, so we can't tail-call. */
24912 gcc_assert (!crtl->calls_eh_return || really_return);
24914 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
24916 arm_expand_epilogue_apcs_frame (really_return);
24917 return;
24920 /* Get frame offsets for ARM. */
24921 offsets = arm_get_frame_offsets ();
24922 saved_regs_mask = offsets->saved_regs_mask;
24923 num_regs = bit_count (saved_regs_mask);
24925 if (frame_pointer_needed)
24927 rtx_insn *insn;
24928 /* Restore stack pointer if necessary. */
24929 if (TARGET_ARM)
24931 /* In ARM mode, frame pointer points to first saved register.
24932 Restore stack pointer to last saved register. */
24933 amount = offsets->frame - offsets->saved_regs;
24935 /* Force out any pending memory operations that reference stacked data
24936 before stack de-allocation occurs. */
24937 emit_insn (gen_blockage ());
24938 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24939 hard_frame_pointer_rtx,
24940 GEN_INT (amount)));
24941 arm_add_cfa_adjust_cfa_note (insn, amount,
24942 stack_pointer_rtx,
24943 hard_frame_pointer_rtx);
24945 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24946 deleted. */
24947 emit_insn (gen_force_register_use (stack_pointer_rtx));
24949 else
24951 /* In Thumb-2 mode, the frame pointer points to the last saved
24952 register. */
24953 amount = offsets->locals_base - offsets->saved_regs;
24954 if (amount)
24956 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
24957 hard_frame_pointer_rtx,
24958 GEN_INT (amount)));
24959 arm_add_cfa_adjust_cfa_note (insn, amount,
24960 hard_frame_pointer_rtx,
24961 hard_frame_pointer_rtx);
24964 /* Force out any pending memory operations that reference stacked data
24965 before stack de-allocation occurs. */
24966 emit_insn (gen_blockage ());
24967 insn = emit_insn (gen_movsi (stack_pointer_rtx,
24968 hard_frame_pointer_rtx));
24969 arm_add_cfa_adjust_cfa_note (insn, 0,
24970 stack_pointer_rtx,
24971 hard_frame_pointer_rtx);
24972 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24973 deleted. */
24974 emit_insn (gen_force_register_use (stack_pointer_rtx));
24977 else
24979 /* Pop off outgoing args and local frame to adjust stack pointer to
24980 last saved register. */
24981 amount = offsets->outgoing_args - offsets->saved_regs;
24982 if (amount)
24984 rtx_insn *tmp;
24985 /* Force out any pending memory operations that reference stacked data
24986 before stack de-allocation occurs. */
24987 emit_insn (gen_blockage ());
24988 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
24989 stack_pointer_rtx,
24990 GEN_INT (amount)));
24991 arm_add_cfa_adjust_cfa_note (tmp, amount,
24992 stack_pointer_rtx, stack_pointer_rtx);
24993 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
24994 not deleted. */
24995 emit_insn (gen_force_register_use (stack_pointer_rtx));
24999 if (TARGET_HARD_FLOAT && TARGET_VFP)
25001 /* Generate VFP register multi-pop. */
25002 int end_reg = LAST_VFP_REGNUM + 1;
25004 /* Scan the registers in reverse order. We need to match
25005 any groupings made in the prologue and generate matching
25006 vldm operations. The need to match groups is because,
25007 unlike pop, vldm can only do consecutive regs. */
25008 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25009 /* Look for a case where a reg does not need restoring. */
25010 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25011 && (!df_regs_ever_live_p (i + 1)
25012 || call_used_regs[i + 1]))
25014 /* Restore the regs discovered so far (from reg+2 to
25015 end_reg). */
25016 if (end_reg > i + 2)
25017 arm_emit_vfp_multi_reg_pop (i + 2,
25018 (end_reg - (i + 2)) / 2,
25019 stack_pointer_rtx);
25020 end_reg = i;
25023 /* Restore the remaining regs that we have discovered (or possibly
25024 even all of them, if the conditional in the for loop never
25025 fired). */
25026 if (end_reg > i + 2)
25027 arm_emit_vfp_multi_reg_pop (i + 2,
25028 (end_reg - (i + 2)) / 2,
25029 stack_pointer_rtx);
25032 if (TARGET_IWMMXT)
25033 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25034 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25036 rtx_insn *insn;
25037 rtx addr = gen_rtx_MEM (V2SImode,
25038 gen_rtx_POST_INC (SImode,
25039 stack_pointer_rtx));
25040 set_mem_alias_set (addr, get_frame_alias_set ());
25041 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25042 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25043 gen_rtx_REG (V2SImode, i),
25044 NULL_RTX);
25045 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25046 stack_pointer_rtx, stack_pointer_rtx);
25049 if (saved_regs_mask)
25051 rtx insn;
25052 bool return_in_pc = false;
25054 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25055 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25056 && !IS_STACKALIGN (func_type)
25057 && really_return
25058 && crtl->args.pretend_args_size == 0
25059 && saved_regs_mask & (1 << LR_REGNUM)
25060 && !crtl->calls_eh_return)
25062 saved_regs_mask &= ~(1 << LR_REGNUM);
25063 saved_regs_mask |= (1 << PC_REGNUM);
25064 return_in_pc = true;
25067 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25069 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25070 if (saved_regs_mask & (1 << i))
25072 rtx addr = gen_rtx_MEM (SImode,
25073 gen_rtx_POST_INC (SImode,
25074 stack_pointer_rtx));
25075 set_mem_alias_set (addr, get_frame_alias_set ());
25077 if (i == PC_REGNUM)
25079 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25080 XVECEXP (insn, 0, 0) = ret_rtx;
25081 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
25082 gen_rtx_REG (SImode, i),
25083 addr);
25084 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25085 insn = emit_jump_insn (insn);
25087 else
25089 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25090 addr));
25091 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25092 gen_rtx_REG (SImode, i),
25093 NULL_RTX);
25094 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25095 stack_pointer_rtx,
25096 stack_pointer_rtx);
25100 else
25102 if (TARGET_LDRD
25103 && current_tune->prefer_ldrd_strd
25104 && !optimize_function_for_size_p (cfun))
25106 if (TARGET_THUMB2)
25107 thumb2_emit_ldrd_pop (saved_regs_mask);
25108 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25109 arm_emit_ldrd_pop (saved_regs_mask);
25110 else
25111 arm_emit_multi_reg_pop (saved_regs_mask);
25113 else
25114 arm_emit_multi_reg_pop (saved_regs_mask);
25117 if (return_in_pc == true)
25118 return;
25121 if (crtl->args.pretend_args_size)
25123 int i, j;
25124 rtx dwarf = NULL_RTX;
25125 rtx_insn *tmp =
25126 emit_insn (gen_addsi3 (stack_pointer_rtx,
25127 stack_pointer_rtx,
25128 GEN_INT (crtl->args.pretend_args_size)));
25130 RTX_FRAME_RELATED_P (tmp) = 1;
25132 if (cfun->machine->uses_anonymous_args)
25134 /* Restore pretend args. Refer arm_expand_prologue on how to save
25135 pretend_args in stack. */
25136 int num_regs = crtl->args.pretend_args_size / 4;
25137 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25138 for (j = 0, i = 0; j < num_regs; i++)
25139 if (saved_regs_mask & (1 << i))
25141 rtx reg = gen_rtx_REG (SImode, i);
25142 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25143 j++;
25145 REG_NOTES (tmp) = dwarf;
25147 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
25148 stack_pointer_rtx, stack_pointer_rtx);
25151 if (!really_return)
25152 return;
25154 if (crtl->calls_eh_return)
25155 emit_insn (gen_addsi3 (stack_pointer_rtx,
25156 stack_pointer_rtx,
25157 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25159 if (IS_STACKALIGN (func_type))
25160 /* Restore the original stack pointer. Before prologue, the stack was
25161 realigned and the original stack pointer saved in r0. For details,
25162 see comment in arm_expand_prologue. */
25163 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
25165 emit_jump_insn (simple_return_rtx);
25168 /* Implementation of insn prologue_thumb1_interwork. This is the first
25169 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25171 const char *
25172 thumb1_output_interwork (void)
25174 const char * name;
25175 FILE *f = asm_out_file;
25177 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25178 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25179 == SYMBOL_REF);
25180 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25182 /* Generate code sequence to switch us into Thumb mode. */
25183 /* The .code 32 directive has already been emitted by
25184 ASM_DECLARE_FUNCTION_NAME. */
25185 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25186 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25188 /* Generate a label, so that the debugger will notice the
25189 change in instruction sets. This label is also used by
25190 the assembler to bypass the ARM code when this function
25191 is called from a Thumb encoded function elsewhere in the
25192 same file. Hence the definition of STUB_NAME here must
25193 agree with the definition in gas/config/tc-arm.c. */
25195 #define STUB_NAME ".real_start_of"
25197 fprintf (f, "\t.code\t16\n");
25198 #ifdef ARM_PE
25199 if (arm_dllexport_name_p (name))
25200 name = arm_strip_name_encoding (name);
25201 #endif
25202 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25203 fprintf (f, "\t.thumb_func\n");
25204 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25206 return "";
25209 /* Handle the case of a double word load into a low register from
25210 a computed memory address. The computed address may involve a
25211 register which is overwritten by the load. */
25212 const char *
25213 thumb_load_double_from_address (rtx *operands)
25215 rtx addr;
25216 rtx base;
25217 rtx offset;
25218 rtx arg1;
25219 rtx arg2;
25221 gcc_assert (REG_P (operands[0]));
25222 gcc_assert (MEM_P (operands[1]));
25224 /* Get the memory address. */
25225 addr = XEXP (operands[1], 0);
25227 /* Work out how the memory address is computed. */
25228 switch (GET_CODE (addr))
25230 case REG:
25231 operands[2] = adjust_address (operands[1], SImode, 4);
25233 if (REGNO (operands[0]) == REGNO (addr))
25235 output_asm_insn ("ldr\t%H0, %2", operands);
25236 output_asm_insn ("ldr\t%0, %1", operands);
25238 else
25240 output_asm_insn ("ldr\t%0, %1", operands);
25241 output_asm_insn ("ldr\t%H0, %2", operands);
25243 break;
25245 case CONST:
25246 /* Compute <address> + 4 for the high order load. */
25247 operands[2] = adjust_address (operands[1], SImode, 4);
25249 output_asm_insn ("ldr\t%0, %1", operands);
25250 output_asm_insn ("ldr\t%H0, %2", operands);
25251 break;
25253 case PLUS:
25254 arg1 = XEXP (addr, 0);
25255 arg2 = XEXP (addr, 1);
25257 if (CONSTANT_P (arg1))
25258 base = arg2, offset = arg1;
25259 else
25260 base = arg1, offset = arg2;
25262 gcc_assert (REG_P (base));
25264 /* Catch the case of <address> = <reg> + <reg> */
25265 if (REG_P (offset))
25267 int reg_offset = REGNO (offset);
25268 int reg_base = REGNO (base);
25269 int reg_dest = REGNO (operands[0]);
25271 /* Add the base and offset registers together into the
25272 higher destination register. */
25273 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25274 reg_dest + 1, reg_base, reg_offset);
25276 /* Load the lower destination register from the address in
25277 the higher destination register. */
25278 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25279 reg_dest, reg_dest + 1);
25281 /* Load the higher destination register from its own address
25282 plus 4. */
25283 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25284 reg_dest + 1, reg_dest + 1);
25286 else
25288 /* Compute <address> + 4 for the high order load. */
25289 operands[2] = adjust_address (operands[1], SImode, 4);
25291 /* If the computed address is held in the low order register
25292 then load the high order register first, otherwise always
25293 load the low order register first. */
25294 if (REGNO (operands[0]) == REGNO (base))
25296 output_asm_insn ("ldr\t%H0, %2", operands);
25297 output_asm_insn ("ldr\t%0, %1", operands);
25299 else
25301 output_asm_insn ("ldr\t%0, %1", operands);
25302 output_asm_insn ("ldr\t%H0, %2", operands);
25305 break;
25307 case LABEL_REF:
25308 /* With no registers to worry about we can just load the value
25309 directly. */
25310 operands[2] = adjust_address (operands[1], SImode, 4);
25312 output_asm_insn ("ldr\t%H0, %2", operands);
25313 output_asm_insn ("ldr\t%0, %1", operands);
25314 break;
25316 default:
25317 gcc_unreachable ();
25320 return "";
25323 const char *
25324 thumb_output_move_mem_multiple (int n, rtx *operands)
25326 rtx tmp;
25328 switch (n)
25330 case 2:
25331 if (REGNO (operands[4]) > REGNO (operands[5]))
25333 tmp = operands[4];
25334 operands[4] = operands[5];
25335 operands[5] = tmp;
25337 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25338 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25339 break;
25341 case 3:
25342 if (REGNO (operands[4]) > REGNO (operands[5]))
25343 std::swap (operands[4], operands[5]);
25344 if (REGNO (operands[5]) > REGNO (operands[6]))
25345 std::swap (operands[5], operands[6]);
25346 if (REGNO (operands[4]) > REGNO (operands[5]))
25347 std::swap (operands[4], operands[5]);
25349 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25350 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25351 break;
25353 default:
25354 gcc_unreachable ();
25357 return "";
25360 /* Output a call-via instruction for thumb state. */
25361 const char *
25362 thumb_call_via_reg (rtx reg)
25364 int regno = REGNO (reg);
25365 rtx *labelp;
25367 gcc_assert (regno < LR_REGNUM);
25369 /* If we are in the normal text section we can use a single instance
25370 per compilation unit. If we are doing function sections, then we need
25371 an entry per section, since we can't rely on reachability. */
25372 if (in_section == text_section)
25374 thumb_call_reg_needed = 1;
25376 if (thumb_call_via_label[regno] == NULL)
25377 thumb_call_via_label[regno] = gen_label_rtx ();
25378 labelp = thumb_call_via_label + regno;
25380 else
25382 if (cfun->machine->call_via[regno] == NULL)
25383 cfun->machine->call_via[regno] = gen_label_rtx ();
25384 labelp = cfun->machine->call_via + regno;
25387 output_asm_insn ("bl\t%a0", labelp);
25388 return "";
25391 /* Routines for generating rtl. */
25392 void
25393 thumb_expand_movmemqi (rtx *operands)
25395 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25396 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25397 HOST_WIDE_INT len = INTVAL (operands[2]);
25398 HOST_WIDE_INT offset = 0;
25400 while (len >= 12)
25402 emit_insn (gen_movmem12b (out, in, out, in));
25403 len -= 12;
25406 if (len >= 8)
25408 emit_insn (gen_movmem8b (out, in, out, in));
25409 len -= 8;
25412 if (len >= 4)
25414 rtx reg = gen_reg_rtx (SImode);
25415 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25416 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25417 len -= 4;
25418 offset += 4;
25421 if (len >= 2)
25423 rtx reg = gen_reg_rtx (HImode);
25424 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25425 plus_constant (Pmode, in,
25426 offset))));
25427 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25428 offset)),
25429 reg));
25430 len -= 2;
25431 offset += 2;
25434 if (len)
25436 rtx reg = gen_reg_rtx (QImode);
25437 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25438 plus_constant (Pmode, in,
25439 offset))));
25440 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25441 offset)),
25442 reg));
25446 void
25447 thumb_reload_out_hi (rtx *operands)
25449 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25452 /* Handle reading a half-word from memory during reload. */
25453 void
25454 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
25456 gcc_unreachable ();
25459 /* Return the length of a function name prefix
25460 that starts with the character 'c'. */
25461 static int
25462 arm_get_strip_length (int c)
25464 switch (c)
25466 ARM_NAME_ENCODING_LENGTHS
25467 default: return 0;
25471 /* Return a pointer to a function's name with any
25472 and all prefix encodings stripped from it. */
25473 const char *
25474 arm_strip_name_encoding (const char *name)
25476 int skip;
25478 while ((skip = arm_get_strip_length (* name)))
25479 name += skip;
25481 return name;
25484 /* If there is a '*' anywhere in the name's prefix, then
25485 emit the stripped name verbatim, otherwise prepend an
25486 underscore if leading underscores are being used. */
25487 void
25488 arm_asm_output_labelref (FILE *stream, const char *name)
25490 int skip;
25491 int verbatim = 0;
25493 while ((skip = arm_get_strip_length (* name)))
25495 verbatim |= (*name == '*');
25496 name += skip;
25499 if (verbatim)
25500 fputs (name, stream);
25501 else
25502 asm_fprintf (stream, "%U%s", name);
25505 /* This function is used to emit an EABI tag and its associated value.
25506 We emit the numerical value of the tag in case the assembler does not
25507 support textual tags. (Eg gas prior to 2.20). If requested we include
25508 the tag name in a comment so that anyone reading the assembler output
25509 will know which tag is being set.
25511 This function is not static because arm-c.c needs it too. */
25513 void
25514 arm_emit_eabi_attribute (const char *name, int num, int val)
25516 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
25517 if (flag_verbose_asm || flag_debug_asm)
25518 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
25519 asm_fprintf (asm_out_file, "\n");
25522 static void
25523 arm_file_start (void)
25525 int val;
25527 if (TARGET_UNIFIED_ASM)
25528 asm_fprintf (asm_out_file, "\t.syntax unified\n");
25530 if (TARGET_BPABI)
25532 const char *fpu_name;
25533 if (arm_selected_arch)
25535 /* armv7ve doesn't support any extensions. */
25536 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
25538 /* Keep backward compatability for assemblers
25539 which don't support armv7ve. */
25540 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
25541 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
25542 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
25543 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
25544 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
25546 else
25548 const char* pos = strchr (arm_selected_arch->name, '+');
25549 if (pos)
25551 char buf[15];
25552 gcc_assert (strlen (arm_selected_arch->name)
25553 <= sizeof (buf) / sizeof (*pos));
25554 strncpy (buf, arm_selected_arch->name,
25555 (pos - arm_selected_arch->name) * sizeof (*pos));
25556 buf[pos - arm_selected_arch->name] = '\0';
25557 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
25558 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
25560 else
25561 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
25564 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
25565 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
25566 else
25568 const char* truncated_name
25569 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
25570 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
25573 if (TARGET_SOFT_FLOAT)
25575 fpu_name = "softvfp";
25577 else
25579 fpu_name = arm_fpu_desc->name;
25580 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
25582 if (TARGET_HARD_FLOAT)
25583 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
25584 if (TARGET_HARD_FLOAT_ABI)
25585 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
25588 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
25590 /* Some of these attributes only apply when the corresponding features
25591 are used. However we don't have any easy way of figuring this out.
25592 Conservatively record the setting that would have been used. */
25594 if (flag_rounding_math)
25595 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
25597 if (!flag_unsafe_math_optimizations)
25599 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
25600 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25602 if (flag_signaling_nans)
25603 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25605 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25606 flag_finite_math_only ? 1 : 3);
25608 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25609 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25610 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25611 flag_short_enums ? 1 : 2);
25613 /* Tag_ABI_optimization_goals. */
25614 if (optimize_size)
25615 val = 4;
25616 else if (optimize >= 2)
25617 val = 2;
25618 else if (optimize)
25619 val = 1;
25620 else
25621 val = 6;
25622 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
25624 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
25625 unaligned_access);
25627 if (arm_fp16_format)
25628 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
25629 (int) arm_fp16_format);
25631 if (arm_lang_output_object_attributes_hook)
25632 arm_lang_output_object_attributes_hook();
25635 default_file_start ();
25638 static void
25639 arm_file_end (void)
25641 int regno;
25643 if (NEED_INDICATE_EXEC_STACK)
25644 /* Add .note.GNU-stack. */
25645 file_end_indicate_exec_stack ();
25647 if (! thumb_call_reg_needed)
25648 return;
25650 switch_to_section (text_section);
25651 asm_fprintf (asm_out_file, "\t.code 16\n");
25652 ASM_OUTPUT_ALIGN (asm_out_file, 1);
25654 for (regno = 0; regno < LR_REGNUM; regno++)
25656 rtx label = thumb_call_via_label[regno];
25658 if (label != 0)
25660 targetm.asm_out.internal_label (asm_out_file, "L",
25661 CODE_LABEL_NUMBER (label));
25662 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
25667 #ifndef ARM_PE
25668 /* Symbols in the text segment can be accessed without indirecting via the
25669 constant pool; it may take an extra binary operation, but this is still
25670 faster than indirecting via memory. Don't do this when not optimizing,
25671 since we won't be calculating al of the offsets necessary to do this
25672 simplification. */
25674 static void
25675 arm_encode_section_info (tree decl, rtx rtl, int first)
25677 if (optimize > 0 && TREE_CONSTANT (decl))
25678 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
25680 default_encode_section_info (decl, rtl, first);
25682 #endif /* !ARM_PE */
25684 static void
25685 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
25687 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
25688 && !strcmp (prefix, "L"))
25690 arm_ccfsm_state = 0;
25691 arm_target_insn = NULL;
25693 default_internal_label (stream, prefix, labelno);
25696 /* Output code to add DELTA to the first argument, and then jump
25697 to FUNCTION. Used for C++ multiple inheritance. */
25698 static void
25699 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
25700 HOST_WIDE_INT delta,
25701 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
25702 tree function)
25704 static int thunk_label = 0;
25705 char label[256];
25706 char labelpc[256];
25707 int mi_delta = delta;
25708 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
25709 int shift = 0;
25710 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
25711 ? 1 : 0);
25712 if (mi_delta < 0)
25713 mi_delta = - mi_delta;
25715 final_start_function (emit_barrier (), file, 1);
25717 if (TARGET_THUMB1)
25719 int labelno = thunk_label++;
25720 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
25721 /* Thunks are entered in arm mode when avaiable. */
25722 if (TARGET_THUMB1_ONLY)
25724 /* push r3 so we can use it as a temporary. */
25725 /* TODO: Omit this save if r3 is not used. */
25726 fputs ("\tpush {r3}\n", file);
25727 fputs ("\tldr\tr3, ", file);
25729 else
25731 fputs ("\tldr\tr12, ", file);
25733 assemble_name (file, label);
25734 fputc ('\n', file);
25735 if (flag_pic)
25737 /* If we are generating PIC, the ldr instruction below loads
25738 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
25739 the address of the add + 8, so we have:
25741 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
25742 = target + 1.
25744 Note that we have "+ 1" because some versions of GNU ld
25745 don't set the low bit of the result for R_ARM_REL32
25746 relocations against thumb function symbols.
25747 On ARMv6M this is +4, not +8. */
25748 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
25749 assemble_name (file, labelpc);
25750 fputs (":\n", file);
25751 if (TARGET_THUMB1_ONLY)
25753 /* This is 2 insns after the start of the thunk, so we know it
25754 is 4-byte aligned. */
25755 fputs ("\tadd\tr3, pc, r3\n", file);
25756 fputs ("\tmov r12, r3\n", file);
25758 else
25759 fputs ("\tadd\tr12, pc, r12\n", file);
25761 else if (TARGET_THUMB1_ONLY)
25762 fputs ("\tmov r12, r3\n", file);
25764 if (TARGET_THUMB1_ONLY)
25766 if (mi_delta > 255)
25768 fputs ("\tldr\tr3, ", file);
25769 assemble_name (file, label);
25770 fputs ("+4\n", file);
25771 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
25772 mi_op, this_regno, this_regno);
25774 else if (mi_delta != 0)
25776 /* Thumb1 unified syntax requires s suffix in instruction name when
25777 one of the operands is immediate. */
25778 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
25779 mi_op, this_regno, this_regno,
25780 mi_delta);
25783 else
25785 /* TODO: Use movw/movt for large constants when available. */
25786 while (mi_delta != 0)
25788 if ((mi_delta & (3 << shift)) == 0)
25789 shift += 2;
25790 else
25792 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
25793 mi_op, this_regno, this_regno,
25794 mi_delta & (0xff << shift));
25795 mi_delta &= ~(0xff << shift);
25796 shift += 8;
25800 if (TARGET_THUMB1)
25802 if (TARGET_THUMB1_ONLY)
25803 fputs ("\tpop\t{r3}\n", file);
25805 fprintf (file, "\tbx\tr12\n");
25806 ASM_OUTPUT_ALIGN (file, 2);
25807 assemble_name (file, label);
25808 fputs (":\n", file);
25809 if (flag_pic)
25811 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
25812 rtx tem = XEXP (DECL_RTL (function), 0);
25813 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
25814 pipeline offset is four rather than eight. Adjust the offset
25815 accordingly. */
25816 tem = plus_constant (GET_MODE (tem), tem,
25817 TARGET_THUMB1_ONLY ? -3 : -7);
25818 tem = gen_rtx_MINUS (GET_MODE (tem),
25819 tem,
25820 gen_rtx_SYMBOL_REF (Pmode,
25821 ggc_strdup (labelpc)));
25822 assemble_integer (tem, 4, BITS_PER_WORD, 1);
25824 else
25825 /* Output ".word .LTHUNKn". */
25826 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
25828 if (TARGET_THUMB1_ONLY && mi_delta > 255)
25829 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
25831 else
25833 fputs ("\tb\t", file);
25834 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
25835 if (NEED_PLT_RELOC)
25836 fputs ("(PLT)", file);
25837 fputc ('\n', file);
25840 final_end_function ();
25844 arm_emit_vector_const (FILE *file, rtx x)
25846 int i;
25847 const char * pattern;
25849 gcc_assert (GET_CODE (x) == CONST_VECTOR);
25851 switch (GET_MODE (x))
25853 case V2SImode: pattern = "%08x"; break;
25854 case V4HImode: pattern = "%04x"; break;
25855 case V8QImode: pattern = "%02x"; break;
25856 default: gcc_unreachable ();
25859 fprintf (file, "0x");
25860 for (i = CONST_VECTOR_NUNITS (x); i--;)
25862 rtx element;
25864 element = CONST_VECTOR_ELT (x, i);
25865 fprintf (file, pattern, INTVAL (element));
25868 return 1;
25871 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
25872 HFmode constant pool entries are actually loaded with ldr. */
25873 void
25874 arm_emit_fp16_const (rtx c)
25876 REAL_VALUE_TYPE r;
25877 long bits;
25879 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
25880 bits = real_to_target (NULL, &r, HFmode);
25881 if (WORDS_BIG_ENDIAN)
25882 assemble_zeros (2);
25883 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
25884 if (!WORDS_BIG_ENDIAN)
25885 assemble_zeros (2);
25888 const char *
25889 arm_output_load_gr (rtx *operands)
25891 rtx reg;
25892 rtx offset;
25893 rtx wcgr;
25894 rtx sum;
25896 if (!MEM_P (operands [1])
25897 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
25898 || !REG_P (reg = XEXP (sum, 0))
25899 || !CONST_INT_P (offset = XEXP (sum, 1))
25900 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
25901 return "wldrw%?\t%0, %1";
25903 /* Fix up an out-of-range load of a GR register. */
25904 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
25905 wcgr = operands[0];
25906 operands[0] = reg;
25907 output_asm_insn ("ldr%?\t%0, %1", operands);
25909 operands[0] = wcgr;
25910 operands[1] = reg;
25911 output_asm_insn ("tmcr%?\t%0, %1", operands);
25912 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
25914 return "";
25917 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
25919 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
25920 named arg and all anonymous args onto the stack.
25921 XXX I know the prologue shouldn't be pushing registers, but it is faster
25922 that way. */
25924 static void
25925 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
25926 machine_mode mode,
25927 tree type,
25928 int *pretend_size,
25929 int second_time ATTRIBUTE_UNUSED)
25931 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
25932 int nregs;
25934 cfun->machine->uses_anonymous_args = 1;
25935 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
25937 nregs = pcum->aapcs_ncrn;
25938 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
25939 nregs++;
25941 else
25942 nregs = pcum->nregs;
25944 if (nregs < NUM_ARG_REGS)
25945 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
25948 /* We can't rely on the caller doing the proper promotion when
25949 using APCS or ATPCS. */
25951 static bool
25952 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
25954 return !TARGET_AAPCS_BASED;
25957 static machine_mode
25958 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
25959 machine_mode mode,
25960 int *punsignedp ATTRIBUTE_UNUSED,
25961 const_tree fntype ATTRIBUTE_UNUSED,
25962 int for_return ATTRIBUTE_UNUSED)
25964 if (GET_MODE_CLASS (mode) == MODE_INT
25965 && GET_MODE_SIZE (mode) < 4)
25966 return SImode;
25968 return mode;
25971 /* AAPCS based ABIs use short enums by default. */
25973 static bool
25974 arm_default_short_enums (void)
25976 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
25980 /* AAPCS requires that anonymous bitfields affect structure alignment. */
25982 static bool
25983 arm_align_anon_bitfield (void)
25985 return TARGET_AAPCS_BASED;
25989 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
25991 static tree
25992 arm_cxx_guard_type (void)
25994 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
25998 /* The EABI says test the least significant bit of a guard variable. */
26000 static bool
26001 arm_cxx_guard_mask_bit (void)
26003 return TARGET_AAPCS_BASED;
26007 /* The EABI specifies that all array cookies are 8 bytes long. */
26009 static tree
26010 arm_get_cookie_size (tree type)
26012 tree size;
26014 if (!TARGET_AAPCS_BASED)
26015 return default_cxx_get_cookie_size (type);
26017 size = build_int_cst (sizetype, 8);
26018 return size;
26022 /* The EABI says that array cookies should also contain the element size. */
26024 static bool
26025 arm_cookie_has_size (void)
26027 return TARGET_AAPCS_BASED;
26031 /* The EABI says constructors and destructors should return a pointer to
26032 the object constructed/destroyed. */
26034 static bool
26035 arm_cxx_cdtor_returns_this (void)
26037 return TARGET_AAPCS_BASED;
26040 /* The EABI says that an inline function may never be the key
26041 method. */
26043 static bool
26044 arm_cxx_key_method_may_be_inline (void)
26046 return !TARGET_AAPCS_BASED;
26049 static void
26050 arm_cxx_determine_class_data_visibility (tree decl)
26052 if (!TARGET_AAPCS_BASED
26053 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26054 return;
26056 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26057 is exported. However, on systems without dynamic vague linkage,
26058 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26059 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26060 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26061 else
26062 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26063 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26066 static bool
26067 arm_cxx_class_data_always_comdat (void)
26069 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26070 vague linkage if the class has no key function. */
26071 return !TARGET_AAPCS_BASED;
26075 /* The EABI says __aeabi_atexit should be used to register static
26076 destructors. */
26078 static bool
26079 arm_cxx_use_aeabi_atexit (void)
26081 return TARGET_AAPCS_BASED;
26085 void
26086 arm_set_return_address (rtx source, rtx scratch)
26088 arm_stack_offsets *offsets;
26089 HOST_WIDE_INT delta;
26090 rtx addr;
26091 unsigned long saved_regs;
26093 offsets = arm_get_frame_offsets ();
26094 saved_regs = offsets->saved_regs_mask;
26096 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26097 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26098 else
26100 if (frame_pointer_needed)
26101 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26102 else
26104 /* LR will be the first saved register. */
26105 delta = offsets->outgoing_args - (offsets->frame + 4);
26108 if (delta >= 4096)
26110 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26111 GEN_INT (delta & ~4095)));
26112 addr = scratch;
26113 delta &= 4095;
26115 else
26116 addr = stack_pointer_rtx;
26118 addr = plus_constant (Pmode, addr, delta);
26120 /* The store needs to be marked as frame related in order to prevent
26121 DSE from deleting it as dead if it is based on fp. */
26122 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26123 RTX_FRAME_RELATED_P (insn) = 1;
26124 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26129 void
26130 thumb_set_return_address (rtx source, rtx scratch)
26132 arm_stack_offsets *offsets;
26133 HOST_WIDE_INT delta;
26134 HOST_WIDE_INT limit;
26135 int reg;
26136 rtx addr;
26137 unsigned long mask;
26139 emit_use (source);
26141 offsets = arm_get_frame_offsets ();
26142 mask = offsets->saved_regs_mask;
26143 if (mask & (1 << LR_REGNUM))
26145 limit = 1024;
26146 /* Find the saved regs. */
26147 if (frame_pointer_needed)
26149 delta = offsets->soft_frame - offsets->saved_args;
26150 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26151 if (TARGET_THUMB1)
26152 limit = 128;
26154 else
26156 delta = offsets->outgoing_args - offsets->saved_args;
26157 reg = SP_REGNUM;
26159 /* Allow for the stack frame. */
26160 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26161 delta -= 16;
26162 /* The link register is always the first saved register. */
26163 delta -= 4;
26165 /* Construct the address. */
26166 addr = gen_rtx_REG (SImode, reg);
26167 if (delta > limit)
26169 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26170 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26171 addr = scratch;
26173 else
26174 addr = plus_constant (Pmode, addr, delta);
26176 /* The store needs to be marked as frame related in order to prevent
26177 DSE from deleting it as dead if it is based on fp. */
26178 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26179 RTX_FRAME_RELATED_P (insn) = 1;
26180 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26182 else
26183 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26186 /* Implements target hook vector_mode_supported_p. */
26187 bool
26188 arm_vector_mode_supported_p (machine_mode mode)
26190 /* Neon also supports V2SImode, etc. listed in the clause below. */
26191 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26192 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
26193 return true;
26195 if ((TARGET_NEON || TARGET_IWMMXT)
26196 && ((mode == V2SImode)
26197 || (mode == V4HImode)
26198 || (mode == V8QImode)))
26199 return true;
26201 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26202 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26203 || mode == V2HAmode))
26204 return true;
26206 return false;
26209 /* Implements target hook array_mode_supported_p. */
26211 static bool
26212 arm_array_mode_supported_p (machine_mode mode,
26213 unsigned HOST_WIDE_INT nelems)
26215 if (TARGET_NEON
26216 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26217 && (nelems >= 2 && nelems <= 4))
26218 return true;
26220 return false;
26223 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26224 registers when autovectorizing for Neon, at least until multiple vector
26225 widths are supported properly by the middle-end. */
26227 static machine_mode
26228 arm_preferred_simd_mode (machine_mode mode)
26230 if (TARGET_NEON)
26231 switch (mode)
26233 case SFmode:
26234 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26235 case SImode:
26236 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26237 case HImode:
26238 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26239 case QImode:
26240 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26241 case DImode:
26242 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26243 return V2DImode;
26244 break;
26246 default:;
26249 if (TARGET_REALLY_IWMMXT)
26250 switch (mode)
26252 case SImode:
26253 return V2SImode;
26254 case HImode:
26255 return V4HImode;
26256 case QImode:
26257 return V8QImode;
26259 default:;
26262 return word_mode;
26265 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26267 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26268 using r0-r4 for function arguments, r7 for the stack frame and don't have
26269 enough left over to do doubleword arithmetic. For Thumb-2 all the
26270 potentially problematic instructions accept high registers so this is not
26271 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26272 that require many low registers. */
26273 static bool
26274 arm_class_likely_spilled_p (reg_class_t rclass)
26276 if ((TARGET_THUMB1 && rclass == LO_REGS)
26277 || rclass == CC_REG)
26278 return true;
26280 return false;
26283 /* Implements target hook small_register_classes_for_mode_p. */
26284 bool
26285 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26287 return TARGET_THUMB1;
26290 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26291 ARM insns and therefore guarantee that the shift count is modulo 256.
26292 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26293 guarantee no particular behavior for out-of-range counts. */
26295 static unsigned HOST_WIDE_INT
26296 arm_shift_truncation_mask (machine_mode mode)
26298 return mode == SImode ? 255 : 0;
26302 /* Map internal gcc register numbers to DWARF2 register numbers. */
26304 unsigned int
26305 arm_dbx_register_number (unsigned int regno)
26307 if (regno < 16)
26308 return regno;
26310 if (IS_VFP_REGNUM (regno))
26312 /* See comment in arm_dwarf_register_span. */
26313 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26314 return 64 + regno - FIRST_VFP_REGNUM;
26315 else
26316 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26319 if (IS_IWMMXT_GR_REGNUM (regno))
26320 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26322 if (IS_IWMMXT_REGNUM (regno))
26323 return 112 + regno - FIRST_IWMMXT_REGNUM;
26325 gcc_unreachable ();
26328 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26329 GCC models tham as 64 32-bit registers, so we need to describe this to
26330 the DWARF generation code. Other registers can use the default. */
26331 static rtx
26332 arm_dwarf_register_span (rtx rtl)
26334 machine_mode mode;
26335 unsigned regno;
26336 rtx parts[16];
26337 int nregs;
26338 int i;
26340 regno = REGNO (rtl);
26341 if (!IS_VFP_REGNUM (regno))
26342 return NULL_RTX;
26344 /* XXX FIXME: The EABI defines two VFP register ranges:
26345 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26346 256-287: D0-D31
26347 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26348 corresponding D register. Until GDB supports this, we shall use the
26349 legacy encodings. We also use these encodings for D0-D15 for
26350 compatibility with older debuggers. */
26351 mode = GET_MODE (rtl);
26352 if (GET_MODE_SIZE (mode) < 8)
26353 return NULL_RTX;
26355 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26357 nregs = GET_MODE_SIZE (mode) / 4;
26358 for (i = 0; i < nregs; i += 2)
26359 if (TARGET_BIG_END)
26361 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
26362 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
26364 else
26366 parts[i] = gen_rtx_REG (SImode, regno + i);
26367 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
26370 else
26372 nregs = GET_MODE_SIZE (mode) / 8;
26373 for (i = 0; i < nregs; i++)
26374 parts[i] = gen_rtx_REG (DImode, regno + i);
26377 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
26380 #if ARM_UNWIND_INFO
26381 /* Emit unwind directives for a store-multiple instruction or stack pointer
26382 push during alignment.
26383 These should only ever be generated by the function prologue code, so
26384 expect them to have a particular form.
26385 The store-multiple instruction sometimes pushes pc as the last register,
26386 although it should not be tracked into unwind information, or for -Os
26387 sometimes pushes some dummy registers before first register that needs
26388 to be tracked in unwind information; such dummy registers are there just
26389 to avoid separate stack adjustment, and will not be restored in the
26390 epilogue. */
26392 static void
26393 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26395 int i;
26396 HOST_WIDE_INT offset;
26397 HOST_WIDE_INT nregs;
26398 int reg_size;
26399 unsigned reg;
26400 unsigned lastreg;
26401 unsigned padfirst = 0, padlast = 0;
26402 rtx e;
26404 e = XVECEXP (p, 0, 0);
26405 gcc_assert (GET_CODE (e) == SET);
26407 /* First insn will adjust the stack pointer. */
26408 gcc_assert (GET_CODE (e) == SET
26409 && REG_P (SET_DEST (e))
26410 && REGNO (SET_DEST (e)) == SP_REGNUM
26411 && GET_CODE (SET_SRC (e)) == PLUS);
26413 offset = -INTVAL (XEXP (SET_SRC (e), 1));
26414 nregs = XVECLEN (p, 0) - 1;
26415 gcc_assert (nregs);
26417 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
26418 if (reg < 16)
26420 /* For -Os dummy registers can be pushed at the beginning to
26421 avoid separate stack pointer adjustment. */
26422 e = XVECEXP (p, 0, 1);
26423 e = XEXP (SET_DEST (e), 0);
26424 if (GET_CODE (e) == PLUS)
26425 padfirst = INTVAL (XEXP (e, 1));
26426 gcc_assert (padfirst == 0 || optimize_size);
26427 /* The function prologue may also push pc, but not annotate it as it is
26428 never restored. We turn this into a stack pointer adjustment. */
26429 e = XVECEXP (p, 0, nregs);
26430 e = XEXP (SET_DEST (e), 0);
26431 if (GET_CODE (e) == PLUS)
26432 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
26433 else
26434 padlast = offset - 4;
26435 gcc_assert (padlast == 0 || padlast == 4);
26436 if (padlast == 4)
26437 fprintf (asm_out_file, "\t.pad #4\n");
26438 reg_size = 4;
26439 fprintf (asm_out_file, "\t.save {");
26441 else if (IS_VFP_REGNUM (reg))
26443 reg_size = 8;
26444 fprintf (asm_out_file, "\t.vsave {");
26446 else
26447 /* Unknown register type. */
26448 gcc_unreachable ();
26450 /* If the stack increment doesn't match the size of the saved registers,
26451 something has gone horribly wrong. */
26452 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
26454 offset = padfirst;
26455 lastreg = 0;
26456 /* The remaining insns will describe the stores. */
26457 for (i = 1; i <= nregs; i++)
26459 /* Expect (set (mem <addr>) (reg)).
26460 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
26461 e = XVECEXP (p, 0, i);
26462 gcc_assert (GET_CODE (e) == SET
26463 && MEM_P (SET_DEST (e))
26464 && REG_P (SET_SRC (e)));
26466 reg = REGNO (SET_SRC (e));
26467 gcc_assert (reg >= lastreg);
26469 if (i != 1)
26470 fprintf (asm_out_file, ", ");
26471 /* We can't use %r for vfp because we need to use the
26472 double precision register names. */
26473 if (IS_VFP_REGNUM (reg))
26474 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
26475 else
26476 asm_fprintf (asm_out_file, "%r", reg);
26478 #ifdef ENABLE_CHECKING
26479 /* Check that the addresses are consecutive. */
26480 e = XEXP (SET_DEST (e), 0);
26481 if (GET_CODE (e) == PLUS)
26482 gcc_assert (REG_P (XEXP (e, 0))
26483 && REGNO (XEXP (e, 0)) == SP_REGNUM
26484 && CONST_INT_P (XEXP (e, 1))
26485 && offset == INTVAL (XEXP (e, 1)));
26486 else
26487 gcc_assert (i == 1
26488 && REG_P (e)
26489 && REGNO (e) == SP_REGNUM);
26490 offset += reg_size;
26491 #endif
26493 fprintf (asm_out_file, "}\n");
26494 if (padfirst)
26495 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
26498 /* Emit unwind directives for a SET. */
26500 static void
26501 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
26503 rtx e0;
26504 rtx e1;
26505 unsigned reg;
26507 e0 = XEXP (p, 0);
26508 e1 = XEXP (p, 1);
26509 switch (GET_CODE (e0))
26511 case MEM:
26512 /* Pushing a single register. */
26513 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
26514 || !REG_P (XEXP (XEXP (e0, 0), 0))
26515 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
26516 abort ();
26518 asm_fprintf (asm_out_file, "\t.save ");
26519 if (IS_VFP_REGNUM (REGNO (e1)))
26520 asm_fprintf(asm_out_file, "{d%d}\n",
26521 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
26522 else
26523 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
26524 break;
26526 case REG:
26527 if (REGNO (e0) == SP_REGNUM)
26529 /* A stack increment. */
26530 if (GET_CODE (e1) != PLUS
26531 || !REG_P (XEXP (e1, 0))
26532 || REGNO (XEXP (e1, 0)) != SP_REGNUM
26533 || !CONST_INT_P (XEXP (e1, 1)))
26534 abort ();
26536 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
26537 -INTVAL (XEXP (e1, 1)));
26539 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
26541 HOST_WIDE_INT offset;
26543 if (GET_CODE (e1) == PLUS)
26545 if (!REG_P (XEXP (e1, 0))
26546 || !CONST_INT_P (XEXP (e1, 1)))
26547 abort ();
26548 reg = REGNO (XEXP (e1, 0));
26549 offset = INTVAL (XEXP (e1, 1));
26550 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
26551 HARD_FRAME_POINTER_REGNUM, reg,
26552 offset);
26554 else if (REG_P (e1))
26556 reg = REGNO (e1);
26557 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
26558 HARD_FRAME_POINTER_REGNUM, reg);
26560 else
26561 abort ();
26563 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
26565 /* Move from sp to reg. */
26566 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
26568 else if (GET_CODE (e1) == PLUS
26569 && REG_P (XEXP (e1, 0))
26570 && REGNO (XEXP (e1, 0)) == SP_REGNUM
26571 && CONST_INT_P (XEXP (e1, 1)))
26573 /* Set reg to offset from sp. */
26574 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
26575 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
26577 else
26578 abort ();
26579 break;
26581 default:
26582 abort ();
26587 /* Emit unwind directives for the given insn. */
26589 static void
26590 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
26592 rtx note, pat;
26593 bool handled_one = false;
26595 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26596 return;
26598 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26599 && (TREE_NOTHROW (current_function_decl)
26600 || crtl->all_throwers_are_sibcalls))
26601 return;
26603 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
26604 return;
26606 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
26608 switch (REG_NOTE_KIND (note))
26610 case REG_FRAME_RELATED_EXPR:
26611 pat = XEXP (note, 0);
26612 goto found;
26614 case REG_CFA_REGISTER:
26615 pat = XEXP (note, 0);
26616 if (pat == NULL)
26618 pat = PATTERN (insn);
26619 if (GET_CODE (pat) == PARALLEL)
26620 pat = XVECEXP (pat, 0, 0);
26623 /* Only emitted for IS_STACKALIGN re-alignment. */
26625 rtx dest, src;
26626 unsigned reg;
26628 src = SET_SRC (pat);
26629 dest = SET_DEST (pat);
26631 gcc_assert (src == stack_pointer_rtx);
26632 reg = REGNO (dest);
26633 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26634 reg + 0x90, reg);
26636 handled_one = true;
26637 break;
26639 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
26640 to get correct dwarf information for shrink-wrap. We should not
26641 emit unwind information for it because these are used either for
26642 pretend arguments or notes to adjust sp and restore registers from
26643 stack. */
26644 case REG_CFA_DEF_CFA:
26645 case REG_CFA_ADJUST_CFA:
26646 case REG_CFA_RESTORE:
26647 return;
26649 case REG_CFA_EXPRESSION:
26650 case REG_CFA_OFFSET:
26651 /* ??? Only handling here what we actually emit. */
26652 gcc_unreachable ();
26654 default:
26655 break;
26658 if (handled_one)
26659 return;
26660 pat = PATTERN (insn);
26661 found:
26663 switch (GET_CODE (pat))
26665 case SET:
26666 arm_unwind_emit_set (asm_out_file, pat);
26667 break;
26669 case SEQUENCE:
26670 /* Store multiple. */
26671 arm_unwind_emit_sequence (asm_out_file, pat);
26672 break;
26674 default:
26675 abort();
26680 /* Output a reference from a function exception table to the type_info
26681 object X. The EABI specifies that the symbol should be relocated by
26682 an R_ARM_TARGET2 relocation. */
26684 static bool
26685 arm_output_ttype (rtx x)
26687 fputs ("\t.word\t", asm_out_file);
26688 output_addr_const (asm_out_file, x);
26689 /* Use special relocations for symbol references. */
26690 if (!CONST_INT_P (x))
26691 fputs ("(TARGET2)", asm_out_file);
26692 fputc ('\n', asm_out_file);
26694 return TRUE;
26697 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
26699 static void
26700 arm_asm_emit_except_personality (rtx personality)
26702 fputs ("\t.personality\t", asm_out_file);
26703 output_addr_const (asm_out_file, personality);
26704 fputc ('\n', asm_out_file);
26707 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
26709 static void
26710 arm_asm_init_sections (void)
26712 exception_section = get_unnamed_section (0, output_section_asm_op,
26713 "\t.handlerdata");
26715 #endif /* ARM_UNWIND_INFO */
26717 /* Output unwind directives for the start/end of a function. */
26719 void
26720 arm_output_fn_unwind (FILE * f, bool prologue)
26722 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26723 return;
26725 if (prologue)
26726 fputs ("\t.fnstart\n", f);
26727 else
26729 /* If this function will never be unwound, then mark it as such.
26730 The came condition is used in arm_unwind_emit to suppress
26731 the frame annotations. */
26732 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26733 && (TREE_NOTHROW (current_function_decl)
26734 || crtl->all_throwers_are_sibcalls))
26735 fputs("\t.cantunwind\n", f);
26737 fputs ("\t.fnend\n", f);
26741 static bool
26742 arm_emit_tls_decoration (FILE *fp, rtx x)
26744 enum tls_reloc reloc;
26745 rtx val;
26747 val = XVECEXP (x, 0, 0);
26748 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
26750 output_addr_const (fp, val);
26752 switch (reloc)
26754 case TLS_GD32:
26755 fputs ("(tlsgd)", fp);
26756 break;
26757 case TLS_LDM32:
26758 fputs ("(tlsldm)", fp);
26759 break;
26760 case TLS_LDO32:
26761 fputs ("(tlsldo)", fp);
26762 break;
26763 case TLS_IE32:
26764 fputs ("(gottpoff)", fp);
26765 break;
26766 case TLS_LE32:
26767 fputs ("(tpoff)", fp);
26768 break;
26769 case TLS_DESCSEQ:
26770 fputs ("(tlsdesc)", fp);
26771 break;
26772 default:
26773 gcc_unreachable ();
26776 switch (reloc)
26778 case TLS_GD32:
26779 case TLS_LDM32:
26780 case TLS_IE32:
26781 case TLS_DESCSEQ:
26782 fputs (" + (. - ", fp);
26783 output_addr_const (fp, XVECEXP (x, 0, 2));
26784 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
26785 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
26786 output_addr_const (fp, XVECEXP (x, 0, 3));
26787 fputc (')', fp);
26788 break;
26789 default:
26790 break;
26793 return TRUE;
26796 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
26798 static void
26799 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
26801 gcc_assert (size == 4);
26802 fputs ("\t.word\t", file);
26803 output_addr_const (file, x);
26804 fputs ("(tlsldo)", file);
26807 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
26809 static bool
26810 arm_output_addr_const_extra (FILE *fp, rtx x)
26812 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
26813 return arm_emit_tls_decoration (fp, x);
26814 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
26816 char label[256];
26817 int labelno = INTVAL (XVECEXP (x, 0, 0));
26819 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
26820 assemble_name_raw (fp, label);
26822 return TRUE;
26824 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
26826 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
26827 if (GOT_PCREL)
26828 fputs ("+.", fp);
26829 fputs ("-(", fp);
26830 output_addr_const (fp, XVECEXP (x, 0, 0));
26831 fputc (')', fp);
26832 return TRUE;
26834 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
26836 output_addr_const (fp, XVECEXP (x, 0, 0));
26837 if (GOT_PCREL)
26838 fputs ("+.", fp);
26839 fputs ("-(", fp);
26840 output_addr_const (fp, XVECEXP (x, 0, 1));
26841 fputc (')', fp);
26842 return TRUE;
26844 else if (GET_CODE (x) == CONST_VECTOR)
26845 return arm_emit_vector_const (fp, x);
26847 return FALSE;
26850 /* Output assembly for a shift instruction.
26851 SET_FLAGS determines how the instruction modifies the condition codes.
26852 0 - Do not set condition codes.
26853 1 - Set condition codes.
26854 2 - Use smallest instruction. */
26855 const char *
26856 arm_output_shift(rtx * operands, int set_flags)
26858 char pattern[100];
26859 static const char flag_chars[3] = {'?', '.', '!'};
26860 const char *shift;
26861 HOST_WIDE_INT val;
26862 char c;
26864 c = flag_chars[set_flags];
26865 if (TARGET_UNIFIED_ASM)
26867 shift = shift_op(operands[3], &val);
26868 if (shift)
26870 if (val != -1)
26871 operands[2] = GEN_INT(val);
26872 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
26874 else
26875 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
26877 else
26878 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
26879 output_asm_insn (pattern, operands);
26880 return "";
26883 /* Output assembly for a WMMX immediate shift instruction. */
26884 const char *
26885 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
26887 int shift = INTVAL (operands[2]);
26888 char templ[50];
26889 machine_mode opmode = GET_MODE (operands[0]);
26891 gcc_assert (shift >= 0);
26893 /* If the shift value in the register versions is > 63 (for D qualifier),
26894 31 (for W qualifier) or 15 (for H qualifier). */
26895 if (((opmode == V4HImode) && (shift > 15))
26896 || ((opmode == V2SImode) && (shift > 31))
26897 || ((opmode == DImode) && (shift > 63)))
26899 if (wror_or_wsra)
26901 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26902 output_asm_insn (templ, operands);
26903 if (opmode == DImode)
26905 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
26906 output_asm_insn (templ, operands);
26909 else
26911 /* The destination register will contain all zeros. */
26912 sprintf (templ, "wzero\t%%0");
26913 output_asm_insn (templ, operands);
26915 return "";
26918 if ((opmode == DImode) && (shift > 32))
26920 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26921 output_asm_insn (templ, operands);
26922 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
26923 output_asm_insn (templ, operands);
26925 else
26927 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
26928 output_asm_insn (templ, operands);
26930 return "";
26933 /* Output assembly for a WMMX tinsr instruction. */
26934 const char *
26935 arm_output_iwmmxt_tinsr (rtx *operands)
26937 int mask = INTVAL (operands[3]);
26938 int i;
26939 char templ[50];
26940 int units = mode_nunits[GET_MODE (operands[0])];
26941 gcc_assert ((mask & (mask - 1)) == 0);
26942 for (i = 0; i < units; ++i)
26944 if ((mask & 0x01) == 1)
26946 break;
26948 mask >>= 1;
26950 gcc_assert (i < units);
26952 switch (GET_MODE (operands[0]))
26954 case V8QImode:
26955 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
26956 break;
26957 case V4HImode:
26958 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
26959 break;
26960 case V2SImode:
26961 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
26962 break;
26963 default:
26964 gcc_unreachable ();
26965 break;
26967 output_asm_insn (templ, operands);
26969 return "";
26972 /* Output a Thumb-1 casesi dispatch sequence. */
26973 const char *
26974 thumb1_output_casesi (rtx *operands)
26976 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
26978 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
26980 switch (GET_MODE(diff_vec))
26982 case QImode:
26983 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
26984 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
26985 case HImode:
26986 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
26987 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
26988 case SImode:
26989 return "bl\t%___gnu_thumb1_case_si";
26990 default:
26991 gcc_unreachable ();
26995 /* Output a Thumb-2 casesi instruction. */
26996 const char *
26997 thumb2_output_casesi (rtx *operands)
26999 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27001 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27003 output_asm_insn ("cmp\t%0, %1", operands);
27004 output_asm_insn ("bhi\t%l3", operands);
27005 switch (GET_MODE(diff_vec))
27007 case QImode:
27008 return "tbb\t[%|pc, %0]";
27009 case HImode:
27010 return "tbh\t[%|pc, %0, lsl #1]";
27011 case SImode:
27012 if (flag_pic)
27014 output_asm_insn ("adr\t%4, %l2", operands);
27015 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27016 output_asm_insn ("add\t%4, %4, %5", operands);
27017 return "bx\t%4";
27019 else
27021 output_asm_insn ("adr\t%4, %l2", operands);
27022 return "ldr\t%|pc, [%4, %0, lsl #2]";
27024 default:
27025 gcc_unreachable ();
27029 /* Most ARM cores are single issue, but some newer ones can dual issue.
27030 The scheduler descriptions rely on this being correct. */
27031 static int
27032 arm_issue_rate (void)
27034 switch (arm_tune)
27036 case cortexa15:
27037 case cortexa57:
27038 return 3;
27040 case cortexm7:
27041 case cortexr4:
27042 case cortexr4f:
27043 case cortexr5:
27044 case genericv7a:
27045 case cortexa5:
27046 case cortexa7:
27047 case cortexa8:
27048 case cortexa9:
27049 case cortexa12:
27050 case cortexa17:
27051 case cortexa53:
27052 case fa726te:
27053 case marvell_pj4:
27054 return 2;
27056 default:
27057 return 1;
27061 const char *
27062 arm_mangle_type (const_tree type)
27064 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27065 has to be managled as if it is in the "std" namespace. */
27066 if (TARGET_AAPCS_BASED
27067 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27068 return "St9__va_list";
27070 /* Half-precision float. */
27071 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27072 return "Dh";
27074 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27075 builtin type. */
27076 if (TYPE_NAME (type) != NULL)
27077 return arm_mangle_builtin_type (type);
27079 /* Use the default mangling. */
27080 return NULL;
27083 /* Order of allocation of core registers for Thumb: this allocation is
27084 written over the corresponding initial entries of the array
27085 initialized with REG_ALLOC_ORDER. We allocate all low registers
27086 first. Saving and restoring a low register is usually cheaper than
27087 using a call-clobbered high register. */
27089 static const int thumb_core_reg_alloc_order[] =
27091 3, 2, 1, 0, 4, 5, 6, 7,
27092 14, 12, 8, 9, 10, 11
27095 /* Adjust register allocation order when compiling for Thumb. */
27097 void
27098 arm_order_regs_for_local_alloc (void)
27100 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27101 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27102 if (TARGET_THUMB)
27103 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27104 sizeof (thumb_core_reg_alloc_order));
27107 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27109 bool
27110 arm_frame_pointer_required (void)
27112 return (cfun->has_nonlocal_label
27113 || SUBTARGET_FRAME_POINTER_REQUIRED
27114 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
27117 /* Only thumb1 can't support conditional execution, so return true if
27118 the target is not thumb1. */
27119 static bool
27120 arm_have_conditional_execution (void)
27122 return !TARGET_THUMB1;
27125 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27126 static HOST_WIDE_INT
27127 arm_vector_alignment (const_tree type)
27129 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27131 if (TARGET_AAPCS_BASED)
27132 align = MIN (align, 64);
27134 return align;
27137 static unsigned int
27138 arm_autovectorize_vector_sizes (void)
27140 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27143 static bool
27144 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27146 /* Vectors which aren't in packed structures will not be less aligned than
27147 the natural alignment of their element type, so this is safe. */
27148 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27149 return !is_packed;
27151 return default_builtin_vector_alignment_reachable (type, is_packed);
27154 static bool
27155 arm_builtin_support_vector_misalignment (machine_mode mode,
27156 const_tree type, int misalignment,
27157 bool is_packed)
27159 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27161 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27163 if (is_packed)
27164 return align == 1;
27166 /* If the misalignment is unknown, we should be able to handle the access
27167 so long as it is not to a member of a packed data structure. */
27168 if (misalignment == -1)
27169 return true;
27171 /* Return true if the misalignment is a multiple of the natural alignment
27172 of the vector's element type. This is probably always going to be
27173 true in practice, since we've already established that this isn't a
27174 packed access. */
27175 return ((misalignment % align) == 0);
27178 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27179 is_packed);
27182 static void
27183 arm_conditional_register_usage (void)
27185 int regno;
27187 if (TARGET_THUMB1 && optimize_size)
27189 /* When optimizing for size on Thumb-1, it's better not
27190 to use the HI regs, because of the overhead of
27191 stacking them. */
27192 for (regno = FIRST_HI_REGNUM;
27193 regno <= LAST_HI_REGNUM; ++regno)
27194 fixed_regs[regno] = call_used_regs[regno] = 1;
27197 /* The link register can be clobbered by any branch insn,
27198 but we have no way to track that at present, so mark
27199 it as unavailable. */
27200 if (TARGET_THUMB1)
27201 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27203 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
27205 /* VFPv3 registers are disabled when earlier VFP
27206 versions are selected due to the definition of
27207 LAST_VFP_REGNUM. */
27208 for (regno = FIRST_VFP_REGNUM;
27209 regno <= LAST_VFP_REGNUM; ++ regno)
27211 fixed_regs[regno] = 0;
27212 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27213 || regno >= FIRST_VFP_REGNUM + 32;
27217 if (TARGET_REALLY_IWMMXT)
27219 regno = FIRST_IWMMXT_GR_REGNUM;
27220 /* The 2002/10/09 revision of the XScale ABI has wCG0
27221 and wCG1 as call-preserved registers. The 2002/11/21
27222 revision changed this so that all wCG registers are
27223 scratch registers. */
27224 for (regno = FIRST_IWMMXT_GR_REGNUM;
27225 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27226 fixed_regs[regno] = 0;
27227 /* The XScale ABI has wR0 - wR9 as scratch registers,
27228 the rest as call-preserved registers. */
27229 for (regno = FIRST_IWMMXT_REGNUM;
27230 regno <= LAST_IWMMXT_REGNUM; ++ regno)
27232 fixed_regs[regno] = 0;
27233 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27237 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27239 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27240 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27242 else if (TARGET_APCS_STACK)
27244 fixed_regs[10] = 1;
27245 call_used_regs[10] = 1;
27247 /* -mcaller-super-interworking reserves r11 for calls to
27248 _interwork_r11_call_via_rN(). Making the register global
27249 is an easy way of ensuring that it remains valid for all
27250 calls. */
27251 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27252 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27254 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27255 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27256 if (TARGET_CALLER_INTERWORKING)
27257 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27259 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27262 static reg_class_t
27263 arm_preferred_rename_class (reg_class_t rclass)
27265 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27266 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27267 and code size can be reduced. */
27268 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27269 return LO_REGS;
27270 else
27271 return NO_REGS;
27274 /* Compute the atrribute "length" of insn "*push_multi".
27275 So this function MUST be kept in sync with that insn pattern. */
27277 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27279 int i, regno, hi_reg;
27280 int num_saves = XVECLEN (parallel_op, 0);
27282 /* ARM mode. */
27283 if (TARGET_ARM)
27284 return 4;
27285 /* Thumb1 mode. */
27286 if (TARGET_THUMB1)
27287 return 2;
27289 /* Thumb2 mode. */
27290 regno = REGNO (first_op);
27291 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27292 for (i = 1; i < num_saves && !hi_reg; i++)
27294 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27295 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27298 if (!hi_reg)
27299 return 2;
27300 return 4;
27303 /* Compute the number of instructions emitted by output_move_double. */
27305 arm_count_output_move_double_insns (rtx *operands)
27307 int count;
27308 rtx ops[2];
27309 /* output_move_double may modify the operands array, so call it
27310 here on a copy of the array. */
27311 ops[0] = operands[0];
27312 ops[1] = operands[1];
27313 output_move_double (ops, false, &count);
27314 return count;
27318 vfp3_const_double_for_fract_bits (rtx operand)
27320 REAL_VALUE_TYPE r0;
27322 if (!CONST_DOUBLE_P (operand))
27323 return 0;
27325 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27326 if (exact_real_inverse (DFmode, &r0))
27328 if (exact_real_truncate (DFmode, &r0))
27330 HOST_WIDE_INT value = real_to_integer (&r0);
27331 value = value & 0xffffffff;
27332 if ((value != 0) && ( (value & (value - 1)) == 0))
27333 return int_log2 (value);
27336 return 0;
27340 vfp3_const_double_for_bits (rtx operand)
27342 REAL_VALUE_TYPE r0;
27344 if (!CONST_DOUBLE_P (operand))
27345 return 0;
27347 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27348 if (exact_real_truncate (DFmode, &r0))
27350 HOST_WIDE_INT value = real_to_integer (&r0);
27351 value = value & 0xffffffff;
27352 if ((value != 0) && ( (value & (value - 1)) == 0))
27353 return int_log2 (value);
27356 return 0;
27359 /* Emit a memory barrier around an atomic sequence according to MODEL. */
27361 static void
27362 arm_pre_atomic_barrier (enum memmodel model)
27364 if (need_atomic_barrier_p (model, true))
27365 emit_insn (gen_memory_barrier ());
27368 static void
27369 arm_post_atomic_barrier (enum memmodel model)
27371 if (need_atomic_barrier_p (model, false))
27372 emit_insn (gen_memory_barrier ());
27375 /* Emit the load-exclusive and store-exclusive instructions.
27376 Use acquire and release versions if necessary. */
27378 static void
27379 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
27381 rtx (*gen) (rtx, rtx);
27383 if (acq)
27385 switch (mode)
27387 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
27388 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
27389 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
27390 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
27391 default:
27392 gcc_unreachable ();
27395 else
27397 switch (mode)
27399 case QImode: gen = gen_arm_load_exclusiveqi; break;
27400 case HImode: gen = gen_arm_load_exclusivehi; break;
27401 case SImode: gen = gen_arm_load_exclusivesi; break;
27402 case DImode: gen = gen_arm_load_exclusivedi; break;
27403 default:
27404 gcc_unreachable ();
27408 emit_insn (gen (rval, mem));
27411 static void
27412 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
27413 rtx mem, bool rel)
27415 rtx (*gen) (rtx, rtx, rtx);
27417 if (rel)
27419 switch (mode)
27421 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
27422 case HImode: gen = gen_arm_store_release_exclusivehi; break;
27423 case SImode: gen = gen_arm_store_release_exclusivesi; break;
27424 case DImode: gen = gen_arm_store_release_exclusivedi; break;
27425 default:
27426 gcc_unreachable ();
27429 else
27431 switch (mode)
27433 case QImode: gen = gen_arm_store_exclusiveqi; break;
27434 case HImode: gen = gen_arm_store_exclusivehi; break;
27435 case SImode: gen = gen_arm_store_exclusivesi; break;
27436 case DImode: gen = gen_arm_store_exclusivedi; break;
27437 default:
27438 gcc_unreachable ();
27442 emit_insn (gen (bval, rval, mem));
27445 /* Mark the previous jump instruction as unlikely. */
27447 static void
27448 emit_unlikely_jump (rtx insn)
27450 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
27452 insn = emit_jump_insn (insn);
27453 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
27456 /* Expand a compare and swap pattern. */
27458 void
27459 arm_expand_compare_and_swap (rtx operands[])
27461 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
27462 machine_mode mode;
27463 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
27465 bval = operands[0];
27466 rval = operands[1];
27467 mem = operands[2];
27468 oldval = operands[3];
27469 newval = operands[4];
27470 is_weak = operands[5];
27471 mod_s = operands[6];
27472 mod_f = operands[7];
27473 mode = GET_MODE (mem);
27475 /* Normally the succ memory model must be stronger than fail, but in the
27476 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27477 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
27479 if (TARGET_HAVE_LDACQ
27480 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
27481 && INTVAL (mod_s) == MEMMODEL_RELEASE)
27482 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
27484 switch (mode)
27486 case QImode:
27487 case HImode:
27488 /* For narrow modes, we're going to perform the comparison in SImode,
27489 so do the zero-extension now. */
27490 rval = gen_reg_rtx (SImode);
27491 oldval = convert_modes (SImode, mode, oldval, true);
27492 /* FALLTHRU */
27494 case SImode:
27495 /* Force the value into a register if needed. We waited until after
27496 the zero-extension above to do this properly. */
27497 if (!arm_add_operand (oldval, SImode))
27498 oldval = force_reg (SImode, oldval);
27499 break;
27501 case DImode:
27502 if (!cmpdi_operand (oldval, mode))
27503 oldval = force_reg (mode, oldval);
27504 break;
27506 default:
27507 gcc_unreachable ();
27510 switch (mode)
27512 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
27513 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
27514 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
27515 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
27516 default:
27517 gcc_unreachable ();
27520 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
27522 if (mode == QImode || mode == HImode)
27523 emit_move_insn (operands[1], gen_lowpart (mode, rval));
27525 /* In all cases, we arrange for success to be signaled by Z set.
27526 This arrangement allows for the boolean result to be used directly
27527 in a subsequent branch, post optimization. */
27528 x = gen_rtx_REG (CCmode, CC_REGNUM);
27529 x = gen_rtx_EQ (SImode, x, const0_rtx);
27530 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
27533 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
27534 another memory store between the load-exclusive and store-exclusive can
27535 reset the monitor from Exclusive to Open state. This means we must wait
27536 until after reload to split the pattern, lest we get a register spill in
27537 the middle of the atomic sequence. */
27539 void
27540 arm_split_compare_and_swap (rtx operands[])
27542 rtx rval, mem, oldval, newval, scratch;
27543 machine_mode mode;
27544 enum memmodel mod_s, mod_f;
27545 bool is_weak;
27546 rtx_code_label *label1, *label2;
27547 rtx x, cond;
27549 rval = operands[0];
27550 mem = operands[1];
27551 oldval = operands[2];
27552 newval = operands[3];
27553 is_weak = (operands[4] != const0_rtx);
27554 mod_s = (enum memmodel) INTVAL (operands[5]);
27555 mod_f = (enum memmodel) INTVAL (operands[6]);
27556 scratch = operands[7];
27557 mode = GET_MODE (mem);
27559 bool use_acquire = TARGET_HAVE_LDACQ
27560 && !(mod_s == MEMMODEL_RELAXED
27561 || mod_s == MEMMODEL_CONSUME
27562 || mod_s == MEMMODEL_RELEASE);
27564 bool use_release = TARGET_HAVE_LDACQ
27565 && !(mod_s == MEMMODEL_RELAXED
27566 || mod_s == MEMMODEL_CONSUME
27567 || mod_s == MEMMODEL_ACQUIRE);
27569 /* Checks whether a barrier is needed and emits one accordingly. */
27570 if (!(use_acquire || use_release))
27571 arm_pre_atomic_barrier (mod_s);
27573 label1 = NULL;
27574 if (!is_weak)
27576 label1 = gen_label_rtx ();
27577 emit_label (label1);
27579 label2 = gen_label_rtx ();
27581 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
27583 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
27584 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27585 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27586 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
27587 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
27589 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
27591 /* Weak or strong, we want EQ to be true for success, so that we
27592 match the flags that we got from the compare above. */
27593 cond = gen_rtx_REG (CCmode, CC_REGNUM);
27594 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
27595 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
27597 if (!is_weak)
27599 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27600 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27601 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
27602 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
27605 if (mod_f != MEMMODEL_RELAXED)
27606 emit_label (label2);
27608 /* Checks whether a barrier is needed and emits one accordingly. */
27609 if (!(use_acquire || use_release))
27610 arm_post_atomic_barrier (mod_s);
27612 if (mod_f == MEMMODEL_RELAXED)
27613 emit_label (label2);
27616 void
27617 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
27618 rtx value, rtx model_rtx, rtx cond)
27620 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
27621 machine_mode mode = GET_MODE (mem);
27622 machine_mode wmode = (mode == DImode ? DImode : SImode);
27623 rtx_code_label *label;
27624 rtx x;
27626 bool use_acquire = TARGET_HAVE_LDACQ
27627 && !(model == MEMMODEL_RELAXED
27628 || model == MEMMODEL_CONSUME
27629 || model == MEMMODEL_RELEASE);
27631 bool use_release = TARGET_HAVE_LDACQ
27632 && !(model == MEMMODEL_RELAXED
27633 || model == MEMMODEL_CONSUME
27634 || model == MEMMODEL_ACQUIRE);
27636 /* Checks whether a barrier is needed and emits one accordingly. */
27637 if (!(use_acquire || use_release))
27638 arm_pre_atomic_barrier (model);
27640 label = gen_label_rtx ();
27641 emit_label (label);
27643 if (new_out)
27644 new_out = gen_lowpart (wmode, new_out);
27645 if (old_out)
27646 old_out = gen_lowpart (wmode, old_out);
27647 else
27648 old_out = new_out;
27649 value = simplify_gen_subreg (wmode, value, mode, 0);
27651 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
27653 switch (code)
27655 case SET:
27656 new_out = value;
27657 break;
27659 case NOT:
27660 x = gen_rtx_AND (wmode, old_out, value);
27661 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27662 x = gen_rtx_NOT (wmode, new_out);
27663 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27664 break;
27666 case MINUS:
27667 if (CONST_INT_P (value))
27669 value = GEN_INT (-INTVAL (value));
27670 code = PLUS;
27672 /* FALLTHRU */
27674 case PLUS:
27675 if (mode == DImode)
27677 /* DImode plus/minus need to clobber flags. */
27678 /* The adddi3 and subdi3 patterns are incorrectly written so that
27679 they require matching operands, even when we could easily support
27680 three operands. Thankfully, this can be fixed up post-splitting,
27681 as the individual add+adc patterns do accept three operands and
27682 post-reload cprop can make these moves go away. */
27683 emit_move_insn (new_out, old_out);
27684 if (code == PLUS)
27685 x = gen_adddi3 (new_out, new_out, value);
27686 else
27687 x = gen_subdi3 (new_out, new_out, value);
27688 emit_insn (x);
27689 break;
27691 /* FALLTHRU */
27693 default:
27694 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
27695 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27696 break;
27699 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
27700 use_release);
27702 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27703 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
27705 /* Checks whether a barrier is needed and emits one accordingly. */
27706 if (!(use_acquire || use_release))
27707 arm_post_atomic_barrier (model);
27710 #define MAX_VECT_LEN 16
27712 struct expand_vec_perm_d
27714 rtx target, op0, op1;
27715 unsigned char perm[MAX_VECT_LEN];
27716 machine_mode vmode;
27717 unsigned char nelt;
27718 bool one_vector_p;
27719 bool testing_p;
27722 /* Generate a variable permutation. */
27724 static void
27725 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
27727 machine_mode vmode = GET_MODE (target);
27728 bool one_vector_p = rtx_equal_p (op0, op1);
27730 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
27731 gcc_checking_assert (GET_MODE (op0) == vmode);
27732 gcc_checking_assert (GET_MODE (op1) == vmode);
27733 gcc_checking_assert (GET_MODE (sel) == vmode);
27734 gcc_checking_assert (TARGET_NEON);
27736 if (one_vector_p)
27738 if (vmode == V8QImode)
27739 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
27740 else
27741 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
27743 else
27745 rtx pair;
27747 if (vmode == V8QImode)
27749 pair = gen_reg_rtx (V16QImode);
27750 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
27751 pair = gen_lowpart (TImode, pair);
27752 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
27754 else
27756 pair = gen_reg_rtx (OImode);
27757 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
27758 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
27763 void
27764 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
27766 machine_mode vmode = GET_MODE (target);
27767 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
27768 bool one_vector_p = rtx_equal_p (op0, op1);
27769 rtx rmask[MAX_VECT_LEN], mask;
27771 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
27772 numbering of elements for big-endian, we must reverse the order. */
27773 gcc_checking_assert (!BYTES_BIG_ENDIAN);
27775 /* The VTBL instruction does not use a modulo index, so we must take care
27776 of that ourselves. */
27777 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
27778 for (i = 0; i < nelt; ++i)
27779 rmask[i] = mask;
27780 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
27781 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
27783 arm_expand_vec_perm_1 (target, op0, op1, sel);
27786 /* Generate or test for an insn that supports a constant permutation. */
27788 /* Recognize patterns for the VUZP insns. */
27790 static bool
27791 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
27793 unsigned int i, odd, mask, nelt = d->nelt;
27794 rtx out0, out1, in0, in1, x;
27795 rtx (*gen)(rtx, rtx, rtx, rtx);
27797 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27798 return false;
27800 /* Note that these are little-endian tests. Adjust for big-endian later. */
27801 if (d->perm[0] == 0)
27802 odd = 0;
27803 else if (d->perm[0] == 1)
27804 odd = 1;
27805 else
27806 return false;
27807 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27809 for (i = 0; i < nelt; i++)
27811 unsigned elt = (i * 2 + odd) & mask;
27812 if (d->perm[i] != elt)
27813 return false;
27816 /* Success! */
27817 if (d->testing_p)
27818 return true;
27820 switch (d->vmode)
27822 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
27823 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
27824 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
27825 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
27826 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
27827 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
27828 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
27829 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
27830 default:
27831 gcc_unreachable ();
27834 in0 = d->op0;
27835 in1 = d->op1;
27836 if (BYTES_BIG_ENDIAN)
27838 x = in0, in0 = in1, in1 = x;
27839 odd = !odd;
27842 out0 = d->target;
27843 out1 = gen_reg_rtx (d->vmode);
27844 if (odd)
27845 x = out0, out0 = out1, out1 = x;
27847 emit_insn (gen (out0, in0, in1, out1));
27848 return true;
27851 /* Recognize patterns for the VZIP insns. */
27853 static bool
27854 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
27856 unsigned int i, high, mask, nelt = d->nelt;
27857 rtx out0, out1, in0, in1, x;
27858 rtx (*gen)(rtx, rtx, rtx, rtx);
27860 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27861 return false;
27863 /* Note that these are little-endian tests. Adjust for big-endian later. */
27864 high = nelt / 2;
27865 if (d->perm[0] == high)
27867 else if (d->perm[0] == 0)
27868 high = 0;
27869 else
27870 return false;
27871 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27873 for (i = 0; i < nelt / 2; i++)
27875 unsigned elt = (i + high) & mask;
27876 if (d->perm[i * 2] != elt)
27877 return false;
27878 elt = (elt + nelt) & mask;
27879 if (d->perm[i * 2 + 1] != elt)
27880 return false;
27883 /* Success! */
27884 if (d->testing_p)
27885 return true;
27887 switch (d->vmode)
27889 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
27890 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
27891 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
27892 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
27893 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
27894 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
27895 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
27896 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
27897 default:
27898 gcc_unreachable ();
27901 in0 = d->op0;
27902 in1 = d->op1;
27903 if (BYTES_BIG_ENDIAN)
27905 x = in0, in0 = in1, in1 = x;
27906 high = !high;
27909 out0 = d->target;
27910 out1 = gen_reg_rtx (d->vmode);
27911 if (high)
27912 x = out0, out0 = out1, out1 = x;
27914 emit_insn (gen (out0, in0, in1, out1));
27915 return true;
27918 /* Recognize patterns for the VREV insns. */
27920 static bool
27921 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
27923 unsigned int i, j, diff, nelt = d->nelt;
27924 rtx (*gen)(rtx, rtx);
27926 if (!d->one_vector_p)
27927 return false;
27929 diff = d->perm[0];
27930 switch (diff)
27932 case 7:
27933 switch (d->vmode)
27935 case V16QImode: gen = gen_neon_vrev64v16qi; break;
27936 case V8QImode: gen = gen_neon_vrev64v8qi; break;
27937 default:
27938 return false;
27940 break;
27941 case 3:
27942 switch (d->vmode)
27944 case V16QImode: gen = gen_neon_vrev32v16qi; break;
27945 case V8QImode: gen = gen_neon_vrev32v8qi; break;
27946 case V8HImode: gen = gen_neon_vrev64v8hi; break;
27947 case V4HImode: gen = gen_neon_vrev64v4hi; break;
27948 default:
27949 return false;
27951 break;
27952 case 1:
27953 switch (d->vmode)
27955 case V16QImode: gen = gen_neon_vrev16v16qi; break;
27956 case V8QImode: gen = gen_neon_vrev16v8qi; break;
27957 case V8HImode: gen = gen_neon_vrev32v8hi; break;
27958 case V4HImode: gen = gen_neon_vrev32v4hi; break;
27959 case V4SImode: gen = gen_neon_vrev64v4si; break;
27960 case V2SImode: gen = gen_neon_vrev64v2si; break;
27961 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
27962 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
27963 default:
27964 return false;
27966 break;
27967 default:
27968 return false;
27971 for (i = 0; i < nelt ; i += diff + 1)
27972 for (j = 0; j <= diff; j += 1)
27974 /* This is guaranteed to be true as the value of diff
27975 is 7, 3, 1 and we should have enough elements in the
27976 queue to generate this. Getting a vector mask with a
27977 value of diff other than these values implies that
27978 something is wrong by the time we get here. */
27979 gcc_assert (i + j < nelt);
27980 if (d->perm[i + j] != i + diff - j)
27981 return false;
27984 /* Success! */
27985 if (d->testing_p)
27986 return true;
27988 emit_insn (gen (d->target, d->op0));
27989 return true;
27992 /* Recognize patterns for the VTRN insns. */
27994 static bool
27995 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
27997 unsigned int i, odd, mask, nelt = d->nelt;
27998 rtx out0, out1, in0, in1, x;
27999 rtx (*gen)(rtx, rtx, rtx, rtx);
28001 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28002 return false;
28004 /* Note that these are little-endian tests. Adjust for big-endian later. */
28005 if (d->perm[0] == 0)
28006 odd = 0;
28007 else if (d->perm[0] == 1)
28008 odd = 1;
28009 else
28010 return false;
28011 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28013 for (i = 0; i < nelt; i += 2)
28015 if (d->perm[i] != i + odd)
28016 return false;
28017 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28018 return false;
28021 /* Success! */
28022 if (d->testing_p)
28023 return true;
28025 switch (d->vmode)
28027 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28028 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
28029 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
28030 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
28031 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
28032 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
28033 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
28034 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
28035 default:
28036 gcc_unreachable ();
28039 in0 = d->op0;
28040 in1 = d->op1;
28041 if (BYTES_BIG_ENDIAN)
28043 x = in0, in0 = in1, in1 = x;
28044 odd = !odd;
28047 out0 = d->target;
28048 out1 = gen_reg_rtx (d->vmode);
28049 if (odd)
28050 x = out0, out0 = out1, out1 = x;
28052 emit_insn (gen (out0, in0, in1, out1));
28053 return true;
28056 /* Recognize patterns for the VEXT insns. */
28058 static bool
28059 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
28061 unsigned int i, nelt = d->nelt;
28062 rtx (*gen) (rtx, rtx, rtx, rtx);
28063 rtx offset;
28065 unsigned int location;
28067 unsigned int next = d->perm[0] + 1;
28069 /* TODO: Handle GCC's numbering of elements for big-endian. */
28070 if (BYTES_BIG_ENDIAN)
28071 return false;
28073 /* Check if the extracted indexes are increasing by one. */
28074 for (i = 1; i < nelt; next++, i++)
28076 /* If we hit the most significant element of the 2nd vector in
28077 the previous iteration, no need to test further. */
28078 if (next == 2 * nelt)
28079 return false;
28081 /* If we are operating on only one vector: it could be a
28082 rotation. If there are only two elements of size < 64, let
28083 arm_evpc_neon_vrev catch it. */
28084 if (d->one_vector_p && (next == nelt))
28086 if ((nelt == 2) && (d->vmode != V2DImode))
28087 return false;
28088 else
28089 next = 0;
28092 if (d->perm[i] != next)
28093 return false;
28096 location = d->perm[0];
28098 switch (d->vmode)
28100 case V16QImode: gen = gen_neon_vextv16qi; break;
28101 case V8QImode: gen = gen_neon_vextv8qi; break;
28102 case V4HImode: gen = gen_neon_vextv4hi; break;
28103 case V8HImode: gen = gen_neon_vextv8hi; break;
28104 case V2SImode: gen = gen_neon_vextv2si; break;
28105 case V4SImode: gen = gen_neon_vextv4si; break;
28106 case V2SFmode: gen = gen_neon_vextv2sf; break;
28107 case V4SFmode: gen = gen_neon_vextv4sf; break;
28108 case V2DImode: gen = gen_neon_vextv2di; break;
28109 default:
28110 return false;
28113 /* Success! */
28114 if (d->testing_p)
28115 return true;
28117 offset = GEN_INT (location);
28118 emit_insn (gen (d->target, d->op0, d->op1, offset));
28119 return true;
28122 /* The NEON VTBL instruction is a fully variable permuation that's even
28123 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28124 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28125 can do slightly better by expanding this as a constant where we don't
28126 have to apply a mask. */
28128 static bool
28129 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
28131 rtx rperm[MAX_VECT_LEN], sel;
28132 machine_mode vmode = d->vmode;
28133 unsigned int i, nelt = d->nelt;
28135 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28136 numbering of elements for big-endian, we must reverse the order. */
28137 if (BYTES_BIG_ENDIAN)
28138 return false;
28140 if (d->testing_p)
28141 return true;
28143 /* Generic code will try constant permutation twice. Once with the
28144 original mode and again with the elements lowered to QImode.
28145 So wait and don't do the selector expansion ourselves. */
28146 if (vmode != V8QImode && vmode != V16QImode)
28147 return false;
28149 for (i = 0; i < nelt; ++i)
28150 rperm[i] = GEN_INT (d->perm[i]);
28151 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
28152 sel = force_reg (vmode, sel);
28154 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
28155 return true;
28158 static bool
28159 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
28161 /* Check if the input mask matches vext before reordering the
28162 operands. */
28163 if (TARGET_NEON)
28164 if (arm_evpc_neon_vext (d))
28165 return true;
28167 /* The pattern matching functions above are written to look for a small
28168 number to begin the sequence (0, 1, N/2). If we begin with an index
28169 from the second operand, we can swap the operands. */
28170 if (d->perm[0] >= d->nelt)
28172 unsigned i, nelt = d->nelt;
28173 rtx x;
28175 for (i = 0; i < nelt; ++i)
28176 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
28178 x = d->op0;
28179 d->op0 = d->op1;
28180 d->op1 = x;
28183 if (TARGET_NEON)
28185 if (arm_evpc_neon_vuzp (d))
28186 return true;
28187 if (arm_evpc_neon_vzip (d))
28188 return true;
28189 if (arm_evpc_neon_vrev (d))
28190 return true;
28191 if (arm_evpc_neon_vtrn (d))
28192 return true;
28193 return arm_evpc_neon_vtbl (d);
28195 return false;
28198 /* Expand a vec_perm_const pattern. */
28200 bool
28201 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
28203 struct expand_vec_perm_d d;
28204 int i, nelt, which;
28206 d.target = target;
28207 d.op0 = op0;
28208 d.op1 = op1;
28210 d.vmode = GET_MODE (target);
28211 gcc_assert (VECTOR_MODE_P (d.vmode));
28212 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28213 d.testing_p = false;
28215 for (i = which = 0; i < nelt; ++i)
28217 rtx e = XVECEXP (sel, 0, i);
28218 int ei = INTVAL (e) & (2 * nelt - 1);
28219 which |= (ei < nelt ? 1 : 2);
28220 d.perm[i] = ei;
28223 switch (which)
28225 default:
28226 gcc_unreachable();
28228 case 3:
28229 d.one_vector_p = false;
28230 if (!rtx_equal_p (op0, op1))
28231 break;
28233 /* The elements of PERM do not suggest that only the first operand
28234 is used, but both operands are identical. Allow easier matching
28235 of the permutation by folding the permutation into the single
28236 input vector. */
28237 /* FALLTHRU */
28238 case 2:
28239 for (i = 0; i < nelt; ++i)
28240 d.perm[i] &= nelt - 1;
28241 d.op0 = op1;
28242 d.one_vector_p = true;
28243 break;
28245 case 1:
28246 d.op1 = op0;
28247 d.one_vector_p = true;
28248 break;
28251 return arm_expand_vec_perm_const_1 (&d);
28254 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
28256 static bool
28257 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
28258 const unsigned char *sel)
28260 struct expand_vec_perm_d d;
28261 unsigned int i, nelt, which;
28262 bool ret;
28264 d.vmode = vmode;
28265 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28266 d.testing_p = true;
28267 memcpy (d.perm, sel, nelt);
28269 /* Categorize the set of elements in the selector. */
28270 for (i = which = 0; i < nelt; ++i)
28272 unsigned char e = d.perm[i];
28273 gcc_assert (e < 2 * nelt);
28274 which |= (e < nelt ? 1 : 2);
28277 /* For all elements from second vector, fold the elements to first. */
28278 if (which == 2)
28279 for (i = 0; i < nelt; ++i)
28280 d.perm[i] -= nelt;
28282 /* Check whether the mask can be applied to the vector type. */
28283 d.one_vector_p = (which != 3);
28285 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
28286 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
28287 if (!d.one_vector_p)
28288 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
28290 start_sequence ();
28291 ret = arm_expand_vec_perm_const_1 (&d);
28292 end_sequence ();
28294 return ret;
28297 bool
28298 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
28300 /* If we are soft float and we do not have ldrd
28301 then all auto increment forms are ok. */
28302 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
28303 return true;
28305 switch (code)
28307 /* Post increment and Pre Decrement are supported for all
28308 instruction forms except for vector forms. */
28309 case ARM_POST_INC:
28310 case ARM_PRE_DEC:
28311 if (VECTOR_MODE_P (mode))
28313 if (code != ARM_PRE_DEC)
28314 return true;
28315 else
28316 return false;
28319 return true;
28321 case ARM_POST_DEC:
28322 case ARM_PRE_INC:
28323 /* Without LDRD and mode size greater than
28324 word size, there is no point in auto-incrementing
28325 because ldm and stm will not have these forms. */
28326 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
28327 return false;
28329 /* Vector and floating point modes do not support
28330 these auto increment forms. */
28331 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
28332 return false;
28334 return true;
28336 default:
28337 return false;
28341 return false;
28344 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28345 on ARM, since we know that shifts by negative amounts are no-ops.
28346 Additionally, the default expansion code is not available or suitable
28347 for post-reload insn splits (this can occur when the register allocator
28348 chooses not to do a shift in NEON).
28350 This function is used in both initial expand and post-reload splits, and
28351 handles all kinds of 64-bit shifts.
28353 Input requirements:
28354 - It is safe for the input and output to be the same register, but
28355 early-clobber rules apply for the shift amount and scratch registers.
28356 - Shift by register requires both scratch registers. In all other cases
28357 the scratch registers may be NULL.
28358 - Ashiftrt by a register also clobbers the CC register. */
28359 void
28360 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
28361 rtx amount, rtx scratch1, rtx scratch2)
28363 rtx out_high = gen_highpart (SImode, out);
28364 rtx out_low = gen_lowpart (SImode, out);
28365 rtx in_high = gen_highpart (SImode, in);
28366 rtx in_low = gen_lowpart (SImode, in);
28368 /* Terminology:
28369 in = the register pair containing the input value.
28370 out = the destination register pair.
28371 up = the high- or low-part of each pair.
28372 down = the opposite part to "up".
28373 In a shift, we can consider bits to shift from "up"-stream to
28374 "down"-stream, so in a left-shift "up" is the low-part and "down"
28375 is the high-part of each register pair. */
28377 rtx out_up = code == ASHIFT ? out_low : out_high;
28378 rtx out_down = code == ASHIFT ? out_high : out_low;
28379 rtx in_up = code == ASHIFT ? in_low : in_high;
28380 rtx in_down = code == ASHIFT ? in_high : in_low;
28382 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
28383 gcc_assert (out
28384 && (REG_P (out) || GET_CODE (out) == SUBREG)
28385 && GET_MODE (out) == DImode);
28386 gcc_assert (in
28387 && (REG_P (in) || GET_CODE (in) == SUBREG)
28388 && GET_MODE (in) == DImode);
28389 gcc_assert (amount
28390 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
28391 && GET_MODE (amount) == SImode)
28392 || CONST_INT_P (amount)));
28393 gcc_assert (scratch1 == NULL
28394 || (GET_CODE (scratch1) == SCRATCH)
28395 || (GET_MODE (scratch1) == SImode
28396 && REG_P (scratch1)));
28397 gcc_assert (scratch2 == NULL
28398 || (GET_CODE (scratch2) == SCRATCH)
28399 || (GET_MODE (scratch2) == SImode
28400 && REG_P (scratch2)));
28401 gcc_assert (!REG_P (out) || !REG_P (amount)
28402 || !HARD_REGISTER_P (out)
28403 || (REGNO (out) != REGNO (amount)
28404 && REGNO (out) + 1 != REGNO (amount)));
28406 /* Macros to make following code more readable. */
28407 #define SUB_32(DEST,SRC) \
28408 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28409 #define RSB_32(DEST,SRC) \
28410 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28411 #define SUB_S_32(DEST,SRC) \
28412 gen_addsi3_compare0 ((DEST), (SRC), \
28413 GEN_INT (-32))
28414 #define SET(DEST,SRC) \
28415 gen_rtx_SET (SImode, (DEST), (SRC))
28416 #define SHIFT(CODE,SRC,AMOUNT) \
28417 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28418 #define LSHIFT(CODE,SRC,AMOUNT) \
28419 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28420 SImode, (SRC), (AMOUNT))
28421 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28422 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28423 SImode, (SRC), (AMOUNT))
28424 #define ORR(A,B) \
28425 gen_rtx_IOR (SImode, (A), (B))
28426 #define BRANCH(COND,LABEL) \
28427 gen_arm_cond_branch ((LABEL), \
28428 gen_rtx_ ## COND (CCmode, cc_reg, \
28429 const0_rtx), \
28430 cc_reg)
28432 /* Shifts by register and shifts by constant are handled separately. */
28433 if (CONST_INT_P (amount))
28435 /* We have a shift-by-constant. */
28437 /* First, handle out-of-range shift amounts.
28438 In both cases we try to match the result an ARM instruction in a
28439 shift-by-register would give. This helps reduce execution
28440 differences between optimization levels, but it won't stop other
28441 parts of the compiler doing different things. This is "undefined
28442 behaviour, in any case. */
28443 if (INTVAL (amount) <= 0)
28444 emit_insn (gen_movdi (out, in));
28445 else if (INTVAL (amount) >= 64)
28447 if (code == ASHIFTRT)
28449 rtx const31_rtx = GEN_INT (31);
28450 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
28451 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
28453 else
28454 emit_insn (gen_movdi (out, const0_rtx));
28457 /* Now handle valid shifts. */
28458 else if (INTVAL (amount) < 32)
28460 /* Shifts by a constant less than 32. */
28461 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
28463 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28464 emit_insn (SET (out_down,
28465 ORR (REV_LSHIFT (code, in_up, reverse_amount),
28466 out_down)));
28467 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28469 else
28471 /* Shifts by a constant greater than 31. */
28472 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
28474 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
28475 if (code == ASHIFTRT)
28476 emit_insn (gen_ashrsi3 (out_up, in_up,
28477 GEN_INT (31)));
28478 else
28479 emit_insn (SET (out_up, const0_rtx));
28482 else
28484 /* We have a shift-by-register. */
28485 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
28487 /* This alternative requires the scratch registers. */
28488 gcc_assert (scratch1 && REG_P (scratch1));
28489 gcc_assert (scratch2 && REG_P (scratch2));
28491 /* We will need the values "amount-32" and "32-amount" later.
28492 Swapping them around now allows the later code to be more general. */
28493 switch (code)
28495 case ASHIFT:
28496 emit_insn (SUB_32 (scratch1, amount));
28497 emit_insn (RSB_32 (scratch2, amount));
28498 break;
28499 case ASHIFTRT:
28500 emit_insn (RSB_32 (scratch1, amount));
28501 /* Also set CC = amount > 32. */
28502 emit_insn (SUB_S_32 (scratch2, amount));
28503 break;
28504 case LSHIFTRT:
28505 emit_insn (RSB_32 (scratch1, amount));
28506 emit_insn (SUB_32 (scratch2, amount));
28507 break;
28508 default:
28509 gcc_unreachable ();
28512 /* Emit code like this:
28514 arithmetic-left:
28515 out_down = in_down << amount;
28516 out_down = (in_up << (amount - 32)) | out_down;
28517 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28518 out_up = in_up << amount;
28520 arithmetic-right:
28521 out_down = in_down >> amount;
28522 out_down = (in_up << (32 - amount)) | out_down;
28523 if (amount < 32)
28524 out_down = ((signed)in_up >> (amount - 32)) | out_down;
28525 out_up = in_up << amount;
28527 logical-right:
28528 out_down = in_down >> amount;
28529 out_down = (in_up << (32 - amount)) | out_down;
28530 if (amount < 32)
28531 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28532 out_up = in_up << amount;
28534 The ARM and Thumb2 variants are the same but implemented slightly
28535 differently. If this were only called during expand we could just
28536 use the Thumb2 case and let combine do the right thing, but this
28537 can also be called from post-reload splitters. */
28539 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28541 if (!TARGET_THUMB2)
28543 /* Emit code for ARM mode. */
28544 emit_insn (SET (out_down,
28545 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
28546 if (code == ASHIFTRT)
28548 rtx_code_label *done_label = gen_label_rtx ();
28549 emit_jump_insn (BRANCH (LT, done_label));
28550 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
28551 out_down)));
28552 emit_label (done_label);
28554 else
28555 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
28556 out_down)));
28558 else
28560 /* Emit code for Thumb2 mode.
28561 Thumb2 can't do shift and or in one insn. */
28562 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
28563 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
28565 if (code == ASHIFTRT)
28567 rtx_code_label *done_label = gen_label_rtx ();
28568 emit_jump_insn (BRANCH (LT, done_label));
28569 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
28570 emit_insn (SET (out_down, ORR (out_down, scratch2)));
28571 emit_label (done_label);
28573 else
28575 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
28576 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
28580 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28583 #undef SUB_32
28584 #undef RSB_32
28585 #undef SUB_S_32
28586 #undef SET
28587 #undef SHIFT
28588 #undef LSHIFT
28589 #undef REV_LSHIFT
28590 #undef ORR
28591 #undef BRANCH
28595 /* Returns true if a valid comparison operation and makes
28596 the operands in a form that is valid. */
28597 bool
28598 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
28600 enum rtx_code code = GET_CODE (*comparison);
28601 int code_int;
28602 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
28603 ? GET_MODE (*op2) : GET_MODE (*op1);
28605 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
28607 if (code == UNEQ || code == LTGT)
28608 return false;
28610 code_int = (int)code;
28611 arm_canonicalize_comparison (&code_int, op1, op2, 0);
28612 PUT_CODE (*comparison, (enum rtx_code)code_int);
28614 switch (mode)
28616 case SImode:
28617 if (!arm_add_operand (*op1, mode))
28618 *op1 = force_reg (mode, *op1);
28619 if (!arm_add_operand (*op2, mode))
28620 *op2 = force_reg (mode, *op2);
28621 return true;
28623 case DImode:
28624 if (!cmpdi_operand (*op1, mode))
28625 *op1 = force_reg (mode, *op1);
28626 if (!cmpdi_operand (*op2, mode))
28627 *op2 = force_reg (mode, *op2);
28628 return true;
28630 case SFmode:
28631 case DFmode:
28632 if (!arm_float_compare_operand (*op1, mode))
28633 *op1 = force_reg (mode, *op1);
28634 if (!arm_float_compare_operand (*op2, mode))
28635 *op2 = force_reg (mode, *op2);
28636 return true;
28637 default:
28638 break;
28641 return false;
28645 /* Maximum number of instructions to set block of memory. */
28646 static int
28647 arm_block_set_max_insns (void)
28649 if (optimize_function_for_size_p (cfun))
28650 return 4;
28651 else
28652 return current_tune->max_insns_inline_memset;
28655 /* Return TRUE if it's profitable to set block of memory for
28656 non-vectorized case. VAL is the value to set the memory
28657 with. LENGTH is the number of bytes to set. ALIGN is the
28658 alignment of the destination memory in bytes. UNALIGNED_P
28659 is TRUE if we can only set the memory with instructions
28660 meeting alignment requirements. USE_STRD_P is TRUE if we
28661 can use strd to set the memory. */
28662 static bool
28663 arm_block_set_non_vect_profit_p (rtx val,
28664 unsigned HOST_WIDE_INT length,
28665 unsigned HOST_WIDE_INT align,
28666 bool unaligned_p, bool use_strd_p)
28668 int num = 0;
28669 /* For leftovers in bytes of 0-7, we can set the memory block using
28670 strb/strh/str with minimum instruction number. */
28671 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
28673 if (unaligned_p)
28675 num = arm_const_inline_cost (SET, val);
28676 num += length / align + length % align;
28678 else if (use_strd_p)
28680 num = arm_const_double_inline_cost (val);
28681 num += (length >> 3) + leftover[length & 7];
28683 else
28685 num = arm_const_inline_cost (SET, val);
28686 num += (length >> 2) + leftover[length & 3];
28689 /* We may be able to combine last pair STRH/STRB into a single STR
28690 by shifting one byte back. */
28691 if (unaligned_access && length > 3 && (length & 3) == 3)
28692 num--;
28694 return (num <= arm_block_set_max_insns ());
28697 /* Return TRUE if it's profitable to set block of memory for
28698 vectorized case. LENGTH is the number of bytes to set.
28699 ALIGN is the alignment of destination memory in bytes.
28700 MODE is the vector mode used to set the memory. */
28701 static bool
28702 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
28703 unsigned HOST_WIDE_INT align,
28704 machine_mode mode)
28706 int num;
28707 bool unaligned_p = ((align & 3) != 0);
28708 unsigned int nelt = GET_MODE_NUNITS (mode);
28710 /* Instruction loading constant value. */
28711 num = 1;
28712 /* Instructions storing the memory. */
28713 num += (length + nelt - 1) / nelt;
28714 /* Instructions adjusting the address expression. Only need to
28715 adjust address expression if it's 4 bytes aligned and bytes
28716 leftover can only be stored by mis-aligned store instruction. */
28717 if (!unaligned_p && (length & 3) != 0)
28718 num++;
28720 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
28721 if (!unaligned_p && mode == V16QImode)
28722 num--;
28724 return (num <= arm_block_set_max_insns ());
28727 /* Set a block of memory using vectorization instructions for the
28728 unaligned case. We fill the first LENGTH bytes of the memory
28729 area starting from DSTBASE with byte constant VALUE. ALIGN is
28730 the alignment requirement of memory. Return TRUE if succeeded. */
28731 static bool
28732 arm_block_set_unaligned_vect (rtx dstbase,
28733 unsigned HOST_WIDE_INT length,
28734 unsigned HOST_WIDE_INT value,
28735 unsigned HOST_WIDE_INT align)
28737 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
28738 rtx dst, mem;
28739 rtx val_elt, val_vec, reg;
28740 rtx rval[MAX_VECT_LEN];
28741 rtx (*gen_func) (rtx, rtx);
28742 machine_mode mode;
28743 unsigned HOST_WIDE_INT v = value;
28745 gcc_assert ((align & 0x3) != 0);
28746 nelt_v8 = GET_MODE_NUNITS (V8QImode);
28747 nelt_v16 = GET_MODE_NUNITS (V16QImode);
28748 if (length >= nelt_v16)
28750 mode = V16QImode;
28751 gen_func = gen_movmisalignv16qi;
28753 else
28755 mode = V8QImode;
28756 gen_func = gen_movmisalignv8qi;
28758 nelt_mode = GET_MODE_NUNITS (mode);
28759 gcc_assert (length >= nelt_mode);
28760 /* Skip if it isn't profitable. */
28761 if (!arm_block_set_vect_profit_p (length, align, mode))
28762 return false;
28764 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28765 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28767 v = sext_hwi (v, BITS_PER_WORD);
28768 val_elt = GEN_INT (v);
28769 for (j = 0; j < nelt_mode; j++)
28770 rval[j] = val_elt;
28772 reg = gen_reg_rtx (mode);
28773 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28774 /* Emit instruction loading the constant value. */
28775 emit_move_insn (reg, val_vec);
28777 /* Handle nelt_mode bytes in a vector. */
28778 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
28780 emit_insn ((*gen_func) (mem, reg));
28781 if (i + 2 * nelt_mode <= length)
28782 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
28785 /* If there are not less than nelt_v8 bytes leftover, we must be in
28786 V16QI mode. */
28787 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
28789 /* Handle (8, 16) bytes leftover. */
28790 if (i + nelt_v8 < length)
28792 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
28793 /* We are shifting bytes back, set the alignment accordingly. */
28794 if ((length & 1) != 0 && align >= 2)
28795 set_mem_align (mem, BITS_PER_UNIT);
28797 emit_insn (gen_movmisalignv16qi (mem, reg));
28799 /* Handle (0, 8] bytes leftover. */
28800 else if (i < length && i + nelt_v8 >= length)
28802 if (mode == V16QImode)
28804 reg = gen_lowpart (V8QImode, reg);
28805 mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
28807 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
28808 + (nelt_mode - nelt_v8))));
28809 /* We are shifting bytes back, set the alignment accordingly. */
28810 if ((length & 1) != 0 && align >= 2)
28811 set_mem_align (mem, BITS_PER_UNIT);
28813 emit_insn (gen_movmisalignv8qi (mem, reg));
28816 return true;
28819 /* Set a block of memory using vectorization instructions for the
28820 aligned case. We fill the first LENGTH bytes of the memory area
28821 starting from DSTBASE with byte constant VALUE. ALIGN is the
28822 alignment requirement of memory. Return TRUE if succeeded. */
28823 static bool
28824 arm_block_set_aligned_vect (rtx dstbase,
28825 unsigned HOST_WIDE_INT length,
28826 unsigned HOST_WIDE_INT value,
28827 unsigned HOST_WIDE_INT align)
28829 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
28830 rtx dst, addr, mem;
28831 rtx val_elt, val_vec, reg;
28832 rtx rval[MAX_VECT_LEN];
28833 machine_mode mode;
28834 unsigned HOST_WIDE_INT v = value;
28836 gcc_assert ((align & 0x3) == 0);
28837 nelt_v8 = GET_MODE_NUNITS (V8QImode);
28838 nelt_v16 = GET_MODE_NUNITS (V16QImode);
28839 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
28840 mode = V16QImode;
28841 else
28842 mode = V8QImode;
28844 nelt_mode = GET_MODE_NUNITS (mode);
28845 gcc_assert (length >= nelt_mode);
28846 /* Skip if it isn't profitable. */
28847 if (!arm_block_set_vect_profit_p (length, align, mode))
28848 return false;
28850 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28852 v = sext_hwi (v, BITS_PER_WORD);
28853 val_elt = GEN_INT (v);
28854 for (j = 0; j < nelt_mode; j++)
28855 rval[j] = val_elt;
28857 reg = gen_reg_rtx (mode);
28858 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28859 /* Emit instruction loading the constant value. */
28860 emit_move_insn (reg, val_vec);
28862 i = 0;
28863 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
28864 if (mode == V16QImode)
28866 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28867 emit_insn (gen_movmisalignv16qi (mem, reg));
28868 i += nelt_mode;
28869 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
28870 if (i + nelt_v8 < length && i + nelt_v16 > length)
28872 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
28873 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28874 /* We are shifting bytes back, set the alignment accordingly. */
28875 if ((length & 0x3) == 0)
28876 set_mem_align (mem, BITS_PER_UNIT * 4);
28877 else if ((length & 0x1) == 0)
28878 set_mem_align (mem, BITS_PER_UNIT * 2);
28879 else
28880 set_mem_align (mem, BITS_PER_UNIT);
28882 emit_insn (gen_movmisalignv16qi (mem, reg));
28883 return true;
28885 /* Fall through for bytes leftover. */
28886 mode = V8QImode;
28887 nelt_mode = GET_MODE_NUNITS (mode);
28888 reg = gen_lowpart (V8QImode, reg);
28891 /* Handle 8 bytes in a vector. */
28892 for (; (i + nelt_mode <= length); i += nelt_mode)
28894 addr = plus_constant (Pmode, dst, i);
28895 mem = adjust_automodify_address (dstbase, mode, addr, i);
28896 emit_move_insn (mem, reg);
28899 /* Handle single word leftover by shifting 4 bytes back. We can
28900 use aligned access for this case. */
28901 if (i + UNITS_PER_WORD == length)
28903 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
28904 mem = adjust_automodify_address (dstbase, mode,
28905 addr, i - UNITS_PER_WORD);
28906 /* We are shifting 4 bytes back, set the alignment accordingly. */
28907 if (align > UNITS_PER_WORD)
28908 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
28910 emit_move_insn (mem, reg);
28912 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
28913 We have to use unaligned access for this case. */
28914 else if (i < length)
28916 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
28917 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28918 /* We are shifting bytes back, set the alignment accordingly. */
28919 if ((length & 1) == 0)
28920 set_mem_align (mem, BITS_PER_UNIT * 2);
28921 else
28922 set_mem_align (mem, BITS_PER_UNIT);
28924 emit_insn (gen_movmisalignv8qi (mem, reg));
28927 return true;
28930 /* Set a block of memory using plain strh/strb instructions, only
28931 using instructions allowed by ALIGN on processor. We fill the
28932 first LENGTH bytes of the memory area starting from DSTBASE
28933 with byte constant VALUE. ALIGN is the alignment requirement
28934 of memory. */
28935 static bool
28936 arm_block_set_unaligned_non_vect (rtx dstbase,
28937 unsigned HOST_WIDE_INT length,
28938 unsigned HOST_WIDE_INT value,
28939 unsigned HOST_WIDE_INT align)
28941 unsigned int i;
28942 rtx dst, addr, mem;
28943 rtx val_exp, val_reg, reg;
28944 machine_mode mode;
28945 HOST_WIDE_INT v = value;
28947 gcc_assert (align == 1 || align == 2);
28949 if (align == 2)
28950 v |= (value << BITS_PER_UNIT);
28952 v = sext_hwi (v, BITS_PER_WORD);
28953 val_exp = GEN_INT (v);
28954 /* Skip if it isn't profitable. */
28955 if (!arm_block_set_non_vect_profit_p (val_exp, length,
28956 align, true, false))
28957 return false;
28959 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28960 mode = (align == 2 ? HImode : QImode);
28961 val_reg = force_reg (SImode, val_exp);
28962 reg = gen_lowpart (mode, val_reg);
28964 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
28966 addr = plus_constant (Pmode, dst, i);
28967 mem = adjust_automodify_address (dstbase, mode, addr, i);
28968 emit_move_insn (mem, reg);
28971 /* Handle single byte leftover. */
28972 if (i + 1 == length)
28974 reg = gen_lowpart (QImode, val_reg);
28975 addr = plus_constant (Pmode, dst, i);
28976 mem = adjust_automodify_address (dstbase, QImode, addr, i);
28977 emit_move_insn (mem, reg);
28978 i++;
28981 gcc_assert (i == length);
28982 return true;
28985 /* Set a block of memory using plain strd/str/strh/strb instructions,
28986 to permit unaligned copies on processors which support unaligned
28987 semantics for those instructions. We fill the first LENGTH bytes
28988 of the memory area starting from DSTBASE with byte constant VALUE.
28989 ALIGN is the alignment requirement of memory. */
28990 static bool
28991 arm_block_set_aligned_non_vect (rtx dstbase,
28992 unsigned HOST_WIDE_INT length,
28993 unsigned HOST_WIDE_INT value,
28994 unsigned HOST_WIDE_INT align)
28996 unsigned int i;
28997 rtx dst, addr, mem;
28998 rtx val_exp, val_reg, reg;
28999 unsigned HOST_WIDE_INT v;
29000 bool use_strd_p;
29002 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
29003 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
29005 v = (value | (value << 8) | (value << 16) | (value << 24));
29006 if (length < UNITS_PER_WORD)
29007 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
29009 if (use_strd_p)
29010 v |= (v << BITS_PER_WORD);
29011 else
29012 v = sext_hwi (v, BITS_PER_WORD);
29014 val_exp = GEN_INT (v);
29015 /* Skip if it isn't profitable. */
29016 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29017 align, false, use_strd_p))
29019 if (!use_strd_p)
29020 return false;
29022 /* Try without strd. */
29023 v = (v >> BITS_PER_WORD);
29024 v = sext_hwi (v, BITS_PER_WORD);
29025 val_exp = GEN_INT (v);
29026 use_strd_p = false;
29027 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29028 align, false, use_strd_p))
29029 return false;
29032 i = 0;
29033 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29034 /* Handle double words using strd if possible. */
29035 if (use_strd_p)
29037 val_reg = force_reg (DImode, val_exp);
29038 reg = val_reg;
29039 for (; (i + 8 <= length); i += 8)
29041 addr = plus_constant (Pmode, dst, i);
29042 mem = adjust_automodify_address (dstbase, DImode, addr, i);
29043 emit_move_insn (mem, reg);
29046 else
29047 val_reg = force_reg (SImode, val_exp);
29049 /* Handle words. */
29050 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
29051 for (; (i + 4 <= length); i += 4)
29053 addr = plus_constant (Pmode, dst, i);
29054 mem = adjust_automodify_address (dstbase, SImode, addr, i);
29055 if ((align & 3) == 0)
29056 emit_move_insn (mem, reg);
29057 else
29058 emit_insn (gen_unaligned_storesi (mem, reg));
29061 /* Merge last pair of STRH and STRB into a STR if possible. */
29062 if (unaligned_access && i > 0 && (i + 3) == length)
29064 addr = plus_constant (Pmode, dst, i - 1);
29065 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
29066 /* We are shifting one byte back, set the alignment accordingly. */
29067 if ((align & 1) == 0)
29068 set_mem_align (mem, BITS_PER_UNIT);
29070 /* Most likely this is an unaligned access, and we can't tell at
29071 compilation time. */
29072 emit_insn (gen_unaligned_storesi (mem, reg));
29073 return true;
29076 /* Handle half word leftover. */
29077 if (i + 2 <= length)
29079 reg = gen_lowpart (HImode, val_reg);
29080 addr = plus_constant (Pmode, dst, i);
29081 mem = adjust_automodify_address (dstbase, HImode, addr, i);
29082 if ((align & 1) == 0)
29083 emit_move_insn (mem, reg);
29084 else
29085 emit_insn (gen_unaligned_storehi (mem, reg));
29087 i += 2;
29090 /* Handle single byte leftover. */
29091 if (i + 1 == length)
29093 reg = gen_lowpart (QImode, val_reg);
29094 addr = plus_constant (Pmode, dst, i);
29095 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29096 emit_move_insn (mem, reg);
29099 return true;
29102 /* Set a block of memory using vectorization instructions for both
29103 aligned and unaligned cases. We fill the first LENGTH bytes of
29104 the memory area starting from DSTBASE with byte constant VALUE.
29105 ALIGN is the alignment requirement of memory. */
29106 static bool
29107 arm_block_set_vect (rtx dstbase,
29108 unsigned HOST_WIDE_INT length,
29109 unsigned HOST_WIDE_INT value,
29110 unsigned HOST_WIDE_INT align)
29112 /* Check whether we need to use unaligned store instruction. */
29113 if (((align & 3) != 0 || (length & 3) != 0)
29114 /* Check whether unaligned store instruction is available. */
29115 && (!unaligned_access || BYTES_BIG_ENDIAN))
29116 return false;
29118 if ((align & 3) == 0)
29119 return arm_block_set_aligned_vect (dstbase, length, value, align);
29120 else
29121 return arm_block_set_unaligned_vect (dstbase, length, value, align);
29124 /* Expand string store operation. Firstly we try to do that by using
29125 vectorization instructions, then try with ARM unaligned access and
29126 double-word store if profitable. OPERANDS[0] is the destination,
29127 OPERANDS[1] is the number of bytes, operands[2] is the value to
29128 initialize the memory, OPERANDS[3] is the known alignment of the
29129 destination. */
29130 bool
29131 arm_gen_setmem (rtx *operands)
29133 rtx dstbase = operands[0];
29134 unsigned HOST_WIDE_INT length;
29135 unsigned HOST_WIDE_INT value;
29136 unsigned HOST_WIDE_INT align;
29138 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
29139 return false;
29141 length = UINTVAL (operands[1]);
29142 if (length > 64)
29143 return false;
29145 value = (UINTVAL (operands[2]) & 0xFF);
29146 align = UINTVAL (operands[3]);
29147 if (TARGET_NEON && length >= 8
29148 && current_tune->string_ops_prefer_neon
29149 && arm_block_set_vect (dstbase, length, value, align))
29150 return true;
29152 if (!unaligned_access && (align & 3) != 0)
29153 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
29155 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
29158 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
29160 static unsigned HOST_WIDE_INT
29161 arm_asan_shadow_offset (void)
29163 return (unsigned HOST_WIDE_INT) 1 << 29;
29167 /* This is a temporary fix for PR60655. Ideally we need
29168 to handle most of these cases in the generic part but
29169 currently we reject minus (..) (sym_ref). We try to
29170 ameliorate the case with minus (sym_ref1) (sym_ref2)
29171 where they are in the same section. */
29173 static bool
29174 arm_const_not_ok_for_debug_p (rtx p)
29176 tree decl_op0 = NULL;
29177 tree decl_op1 = NULL;
29179 if (GET_CODE (p) == MINUS)
29181 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
29183 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
29184 if (decl_op1
29185 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
29186 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
29188 if ((TREE_CODE (decl_op1) == VAR_DECL
29189 || TREE_CODE (decl_op1) == CONST_DECL)
29190 && (TREE_CODE (decl_op0) == VAR_DECL
29191 || TREE_CODE (decl_op0) == CONST_DECL))
29192 return (get_variable_section (decl_op1, false)
29193 != get_variable_section (decl_op0, false));
29195 if (TREE_CODE (decl_op1) == LABEL_DECL
29196 && TREE_CODE (decl_op0) == LABEL_DECL)
29197 return (DECL_CONTEXT (decl_op1)
29198 != DECL_CONTEXT (decl_op0));
29201 return true;
29205 return false;
29208 /* return TRUE if x is a reference to a value in a constant pool */
29209 extern bool
29210 arm_is_constant_pool_ref (rtx x)
29212 return (MEM_P (x)
29213 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
29214 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
29217 /* If MEM is in the form of [base+offset], extract the two parts
29218 of address and set to BASE and OFFSET, otherwise return false
29219 after clearing BASE and OFFSET. */
29221 static bool
29222 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
29224 rtx addr;
29226 gcc_assert (MEM_P (mem));
29228 addr = XEXP (mem, 0);
29230 /* Strip off const from addresses like (const (addr)). */
29231 if (GET_CODE (addr) == CONST)
29232 addr = XEXP (addr, 0);
29234 if (GET_CODE (addr) == REG)
29236 *base = addr;
29237 *offset = const0_rtx;
29238 return true;
29241 if (GET_CODE (addr) == PLUS
29242 && GET_CODE (XEXP (addr, 0)) == REG
29243 && CONST_INT_P (XEXP (addr, 1)))
29245 *base = XEXP (addr, 0);
29246 *offset = XEXP (addr, 1);
29247 return true;
29250 *base = NULL_RTX;
29251 *offset = NULL_RTX;
29253 return false;
29256 /* If INSN is a load or store of address in the form of [base+offset],
29257 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
29258 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
29259 otherwise return FALSE. */
29261 static bool
29262 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
29264 rtx x, dest, src;
29266 gcc_assert (INSN_P (insn));
29267 x = PATTERN (insn);
29268 if (GET_CODE (x) != SET)
29269 return false;
29271 src = SET_SRC (x);
29272 dest = SET_DEST (x);
29273 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
29275 *is_load = false;
29276 extract_base_offset_in_addr (dest, base, offset);
29278 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
29280 *is_load = true;
29281 extract_base_offset_in_addr (src, base, offset);
29283 else
29284 return false;
29286 return (*base != NULL_RTX && *offset != NULL_RTX);
29289 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
29291 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
29292 and PRI are only calculated for these instructions. For other instruction,
29293 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
29294 instruction fusion can be supported by returning different priorities.
29296 It's important that irrelevant instructions get the largest FUSION_PRI. */
29298 static void
29299 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
29300 int *fusion_pri, int *pri)
29302 int tmp, off_val;
29303 bool is_load;
29304 rtx base, offset;
29306 gcc_assert (INSN_P (insn));
29308 tmp = max_pri - 1;
29309 if (!fusion_load_store (insn, &base, &offset, &is_load))
29311 *pri = tmp;
29312 *fusion_pri = tmp;
29313 return;
29316 /* Load goes first. */
29317 if (is_load)
29318 *fusion_pri = tmp - 1;
29319 else
29320 *fusion_pri = tmp - 2;
29322 tmp /= 2;
29324 /* INSN with smaller base register goes first. */
29325 tmp -= ((REGNO (base) & 0xff) << 20);
29327 /* INSN with smaller offset goes first. */
29328 off_val = (int)(INTVAL (offset));
29329 if (off_val >= 0)
29330 tmp -= (off_val & 0xfffff);
29331 else
29332 tmp += ((- off_val) & 0xfffff);
29334 *pri = tmp;
29335 return;
29337 #include "gt-arm.h"